brightfunds: New importer.

This commit adds infrastructure to treat XLS files like CSV files, and import them using the base classes that already exist for that.
2018-01-21 17:04:37 -05:00 · 2018-01-21 17:04:37 -05:00 · 37563ffae0
commit 37563ffae0
parent 2502ca40dd
6 changed files with 188 additions and 2 deletions
--- a/README.rst
+++ b/README.rst
@ -137,6 +137,34 @@ Benevity
  transaction_id   The ID of this specific donation
  ================ ===========================================================

+BrightFunds
+^^^^^^^^^^^
+
+``brightfunds donorreport ledger entry``
+  Imports one transaction per row in donor report XLS files that BrightFunds mails each month to recipients.
+
+  This template can use these variables:
+
+  ================ ===========================================================
+  Name             Contents
+  ================ ===========================================================
+  company_name     The company name as reported in the spreadsheet
+  ---------------- -----------------------------------------------------------
+  corporation      The company name as detected by the importer (this is
+                   usually what you want)
+  ---------------- -----------------------------------------------------------
+  donor_name       The donor name as reported in the spreadsheet (usually you
+                   want to use ``payee`` instead)
+  ---------------- -----------------------------------------------------------
+  donor_email      The donor's e-mail address as reported in the spreadsheet
+  ---------------- -----------------------------------------------------------
+  on_behalf_of     From the corresponding spreadsheet column
+  ---------------- -----------------------------------------------------------
+  fund             From the corresponding spreadsheet column
+  ---------------- -----------------------------------------------------------
+  type             From the corresponding spreadsheet column
+  ================ ===========================================================
+
 Patreon
 ^^^^^^^

--- a/import2ledger/importers/_csv.py
+++ b/import2ledger/importers/_csv.py
@ -33,9 +33,15 @@ class CSVImporterBase:
      included in the entry data returned by _read_header.  If it returns
      None, _read_header expects this is the row with column names for the
      real data, and uses it in its return value.
+    * Reader: A class that accepts the input source and iterates over rows of
+      formatted data.  Default csv.reader.
+    * DictReader: A class that accepts the input source and iterates over rows
+      of data organized into dictionaries.  Default csv.DictReader.
    """
    ENTRY_SEED = {}
    COPIED_FIELDS = {}
+    Reader = csv.reader
+    DictReader = csv.DictReader

    @classmethod
    def _read_header_row(cls, row):
@ -47,7 +53,7 @@ class CSVImporterBase:
        cls._HEADER_MAX_LEN = len(cls._NEEDED_KEYS)
        header = {}
        row = None
-        for row in csv.reader(input_file):
+        for row in cls.Reader(input_file):
            row_data = cls._read_header_row(row)
            if row_data is None:
                break
@ -62,7 +68,7 @@ class CSVImporterBase:

    def __init__(self, input_file):
        self.entry_seed, fields = self._read_header(input_file)
-        self.in_csv = csv.DictReader(input_file, fields)
+        self.in_csv = self.DictReader(input_file, fields)

    def __iter__(self):
        for row in self.in_csv:
--- a/import2ledger/importers/_xls.py
+++ b/import2ledger/importers/_xls.py
@ -0,0 +1,96 @@
+import mmap
+
+import xlrd
+from . import _csv
+
+class BookFromFile:
+    def __init__(self, xls_file, length=0, access=mmap.ACCESS_READ, **kwargs):
+        self.mmap = mmap.mmap(xls_file.fileno(), length, access=access)
+        self.book = xlrd.open_workbook(
+            xls_file.name,
+            file_contents=self.mmap,
+            **kwargs,
+        )
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_tb):
+        self.close()
+
+    def close(self):
+        self.mmap.close()
+        del self.book
+
+
+class RowReader:
+    def __init__(self, rows):
+        self._rows = iter(rows)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return self._format_row(next(self._rows))
+
+    def _format_row(self, row):
+        return [self._format_cell(cell) for cell in row]
+
+    def _format_cell(self, cell):
+        cell_type = cell.ctype
+        if cell_type is xlrd.XL_CELL_EMPTY:
+            return None
+        elif cell_type is xlrd.XL_CELL_BOOLEAN:
+            return bool(cell.value)
+        else:
+            return cell.value
+
+
+class DictReader(RowReader):
+    def __init__(self, rows, fieldnames=None):
+        super().__init__(rows)
+        if fieldnames is None:
+            fieldnames = super()._format_row(next(self._rows))
+        self.fieldnames = fieldnames
+
+    def _format_row(self, row):
+        return {k: v for k, v in zip(self.fieldnames, super()._format_row(row))}
+
+
+class XLSImporterBase(_csv.CSVImporterBase):
+    """Base class for Excel spreadsheet importers.
+
+    Subclasses may define the following:
+    * _get_rows: A method that accepts an xlrd.Book object and returns an
+      iterator of rows from it.  The default implementation yields each row
+      from each sheet in order.
+    """
+
+    BOOK_KWARGS = {}
+    Reader = RowReader
+    DictReader = DictReader
+
+    @classmethod
+    def _open_book(cls, input_file):
+        return BookFromFile(input_file, **cls.BOOK_KWARGS)
+
+    @classmethod
+    def _get_rows(cls, book):
+        for sheet_index in range(book.nsheets):
+            yield from book.sheet_by_index(sheet_index).get_rows()
+
+    @classmethod
+    def can_import(cls, input_file):
+        try:
+            with cls._open_book(input_file) as book_wrapper:
+                return super().can_import(cls._get_rows(book_wrapper.book))
+        except xlrd.biffh.XLRDError:
+            return False
+
+    def __init__(self, input_file):
+        self.wrapper = self._open_book(input_file)
+        return super().__init__(self._get_rows(self.wrapper.book))
+
+    def __iter__(self):
+        yield from super().__iter__()
+        self.wrapper.close()
--- a/import2ledger/importers/brightfunds.py
+++ b/import2ledger/importers/brightfunds.py
@ -0,0 +1,40 @@
+from . import _xls
+from .. import strparse
+
+class DonorReportImporter(_xls.XLSImporterBase):
+    BOOK_KWARGS = {'encoding_override': 'utf-8'}
+    ENTRY_SEED = {'currency': 'USD'}
+    NEEDED_FIELDS = frozenset(['Created', 'Amount'])
+    COPIED_FIELDS = {
+        'Company Name': 'company_name',
+        'Donor Name': 'donor_name',
+        'Donor Email': 'donor_email',
+        'On Behalf Of': 'on_behalf_of',
+        'Designation': 'designation',
+        'Fund': 'fund',
+        'Type': 'type',
+    }
+
+    def _cell_is_blank(self, value):
+        return value == '-' or not value
+
+    def _read_row(self, row):
+        if any(self._cell_is_blank(row[key]) for key in self.NEEDED_FIELDS):
+            return None
+        names = [row[key] for key in ['Company Name', 'Donor Name', 'On Behalf Of']
+                 if not self._cell_is_blank(row[key])]
+        try:
+            corporation, payee, *_ = names
+        except ValueError:
+            corporation = names[0]
+            payee = corporation
+        entry_data = {
+            'amount': '{:.2f}'.format(row['Amount']),
+            'corporation': corporation,
+            'date': strparse.date(row['Created'], '%m/%d/%Y'),
+            'payee': payee,
+        }
+        entry_data.update((entry_key, '')
+                          for row_key, entry_key in self.COPIED_FIELDS.items()
+                          if self._cell_is_blank(row[row_key]))
+        return entry_data
--- a/tests/data/BrightFunds.xls
+++ b/tests/data/BrightFunds.xls
--- a/tests/data/imports.yml
+++ b/tests/data/imports.yml
@ -247,3 +247,19 @@
      comment: ""
      frequency: Recurring
      transaction_id: 67890TYUIO
+
+- source: BrightFunds.xls
+  importer: brightfunds.DonorReportImporter
+  expect:
+    - date: !!python/object/apply:datetime.date [2017, 10, 20]
+      currency: USD
+      amount: !!python/object/apply:decimal.Decimal [120]
+      payee: Dakota Smith
+      corporation: Company
+      company_name: ""
+      designation: ""
+      donor_name: Company
+      donor_email: ""
+      fund: ""
+      on_behalf_of: Dakota Smith
+      type: Matched Donation