brightfunds: New importer.

This commit adds infrastructure to treat XLS files like CSV files, and
import them using the base classes that already exist for that.
This commit is contained in:
Brett Smith 2018-01-21 17:04:37 -05:00
parent 2502ca40dd
commit 37563ffae0
6 changed files with 188 additions and 2 deletions

View file

@ -137,6 +137,34 @@ Benevity
transaction_id The ID of this specific donation
================ ===========================================================
BrightFunds
^^^^^^^^^^^
``brightfunds donorreport ledger entry``
Imports one transaction per row in donor report XLS files that BrightFunds mails each month to recipients.
This template can use these variables:
================ ===========================================================
Name Contents
================ ===========================================================
company_name The company name as reported in the spreadsheet
---------------- -----------------------------------------------------------
corporation The company name as detected by the importer (this is
usually what you want)
---------------- -----------------------------------------------------------
donor_name The donor name as reported in the spreadsheet (usually you
want to use ``payee`` instead)
---------------- -----------------------------------------------------------
donor_email The donor's e-mail address as reported in the spreadsheet
---------------- -----------------------------------------------------------
on_behalf_of From the corresponding spreadsheet column
---------------- -----------------------------------------------------------
fund From the corresponding spreadsheet column
---------------- -----------------------------------------------------------
type From the corresponding spreadsheet column
================ ===========================================================
Patreon
^^^^^^^

View file

@ -33,9 +33,15 @@ class CSVImporterBase:
included in the entry data returned by _read_header. If it returns
None, _read_header expects this is the row with column names for the
real data, and uses it in its return value.
* Reader: A class that accepts the input source and iterates over rows of
formatted data. Default csv.reader.
* DictReader: A class that accepts the input source and iterates over rows
of data organized into dictionaries. Default csv.DictReader.
"""
ENTRY_SEED = {}
COPIED_FIELDS = {}
Reader = csv.reader
DictReader = csv.DictReader
@classmethod
def _read_header_row(cls, row):
@ -47,7 +53,7 @@ class CSVImporterBase:
cls._HEADER_MAX_LEN = len(cls._NEEDED_KEYS)
header = {}
row = None
for row in csv.reader(input_file):
for row in cls.Reader(input_file):
row_data = cls._read_header_row(row)
if row_data is None:
break
@ -62,7 +68,7 @@ class CSVImporterBase:
def __init__(self, input_file):
self.entry_seed, fields = self._read_header(input_file)
self.in_csv = csv.DictReader(input_file, fields)
self.in_csv = self.DictReader(input_file, fields)
def __iter__(self):
for row in self.in_csv:

View file

@ -0,0 +1,96 @@
import mmap
import xlrd
from . import _csv
class BookFromFile:
def __init__(self, xls_file, length=0, access=mmap.ACCESS_READ, **kwargs):
self.mmap = mmap.mmap(xls_file.fileno(), length, access=access)
self.book = xlrd.open_workbook(
xls_file.name,
file_contents=self.mmap,
**kwargs,
)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_tb):
self.close()
def close(self):
self.mmap.close()
del self.book
class RowReader:
def __init__(self, rows):
self._rows = iter(rows)
def __iter__(self):
return self
def __next__(self):
return self._format_row(next(self._rows))
def _format_row(self, row):
return [self._format_cell(cell) for cell in row]
def _format_cell(self, cell):
cell_type = cell.ctype
if cell_type is xlrd.XL_CELL_EMPTY:
return None
elif cell_type is xlrd.XL_CELL_BOOLEAN:
return bool(cell.value)
else:
return cell.value
class DictReader(RowReader):
def __init__(self, rows, fieldnames=None):
super().__init__(rows)
if fieldnames is None:
fieldnames = super()._format_row(next(self._rows))
self.fieldnames = fieldnames
def _format_row(self, row):
return {k: v for k, v in zip(self.fieldnames, super()._format_row(row))}
class XLSImporterBase(_csv.CSVImporterBase):
"""Base class for Excel spreadsheet importers.
Subclasses may define the following:
* _get_rows: A method that accepts an xlrd.Book object and returns an
iterator of rows from it. The default implementation yields each row
from each sheet in order.
"""
BOOK_KWARGS = {}
Reader = RowReader
DictReader = DictReader
@classmethod
def _open_book(cls, input_file):
return BookFromFile(input_file, **cls.BOOK_KWARGS)
@classmethod
def _get_rows(cls, book):
for sheet_index in range(book.nsheets):
yield from book.sheet_by_index(sheet_index).get_rows()
@classmethod
def can_import(cls, input_file):
try:
with cls._open_book(input_file) as book_wrapper:
return super().can_import(cls._get_rows(book_wrapper.book))
except xlrd.biffh.XLRDError:
return False
def __init__(self, input_file):
self.wrapper = self._open_book(input_file)
return super().__init__(self._get_rows(self.wrapper.book))
def __iter__(self):
yield from super().__iter__()
self.wrapper.close()

View file

@ -0,0 +1,40 @@
from . import _xls
from .. import strparse
class DonorReportImporter(_xls.XLSImporterBase):
BOOK_KWARGS = {'encoding_override': 'utf-8'}
ENTRY_SEED = {'currency': 'USD'}
NEEDED_FIELDS = frozenset(['Created', 'Amount'])
COPIED_FIELDS = {
'Company Name': 'company_name',
'Donor Name': 'donor_name',
'Donor Email': 'donor_email',
'On Behalf Of': 'on_behalf_of',
'Designation': 'designation',
'Fund': 'fund',
'Type': 'type',
}
def _cell_is_blank(self, value):
return value == '-' or not value
def _read_row(self, row):
if any(self._cell_is_blank(row[key]) for key in self.NEEDED_FIELDS):
return None
names = [row[key] for key in ['Company Name', 'Donor Name', 'On Behalf Of']
if not self._cell_is_blank(row[key])]
try:
corporation, payee, *_ = names
except ValueError:
corporation = names[0]
payee = corporation
entry_data = {
'amount': '{:.2f}'.format(row['Amount']),
'corporation': corporation,
'date': strparse.date(row['Created'], '%m/%d/%Y'),
'payee': payee,
}
entry_data.update((entry_key, '')
for row_key, entry_key in self.COPIED_FIELDS.items()
if self._cell_is_blank(row[row_key]))
return entry_data

BIN
tests/data/BrightFunds.xls Normal file

Binary file not shown.

View file

@ -247,3 +247,19 @@
comment: ""
frequency: Recurring
transaction_id: 67890TYUIO
- source: BrightFunds.xls
importer: brightfunds.DonorReportImporter
expect:
- date: !!python/object/apply:datetime.date [2017, 10, 20]
currency: USD
amount: !!python/object/apply:decimal.Decimal [120]
payee: Dakota Smith
corporation: Company
company_name: ""
designation: ""
donor_name: Company
donor_email: ""
fund: ""
on_behalf_of: Dakota Smith
type: Matched Donation