From cdedd0fc7cf8c0fee29ab790c66b86290d8b25c0 Mon Sep 17 00:00:00 2001 From: Brett Smith Date: Sat, 30 Dec 2017 10:14:08 -0500 Subject: [PATCH] importers: Add CSVImporterBase documentation for 0f4f83e. --- import2ledger/importers/_csv.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/import2ledger/importers/_csv.py b/import2ledger/importers/_csv.py index 368471c..47ea683 100644 --- a/import2ledger/importers/_csv.py +++ b/import2ledger/importers/_csv.py @@ -12,10 +12,27 @@ class CSVImporterBase: if there's nothing to import from this row. Subclasses may define the following: - * ENTRY_SEED: A dict with the initial entry data. + * ENTRY_SEED: A dict with entry data that can be assumed when it's + coming from this source. * COPIED_FIELDS: A dict that maps column names to data keys. These fields will be copied directly to the entry data dict before _read_row is called. Fields named here must exist in the CSV for it to be imported. + * _read_header(cls, input_file): Some CSVs include "headers" with smaller + rows before they get to the "real" data. This classmethod is expected to + read those rows and return two values: a dict of entry data read from + the headers, and a list of column names for the real data. The method + is expected to leave input_data at the position where the real data + starts, so callers can run ``csv.DictReader(input_file, column_names)`` + after. + The default implementation reads rows until it finds one long enough to + include all of the columns required by NEEDED_FIELDS and COPIED_FIELDS, + then returns ({}, that_row). + * _read_header_row(cls, row): A classmethod that returns either a dict, + or None. The default implementation of _read_header calls this method + on each row. If it returns a dict, those keys and values will be + included in the entry data returned by _read_header. If it returns + None, _read_header expects this is the row with column names for the + real data, and uses it in its return value. """ ENTRY_SEED = {} COPIED_FIELDS = {}