irs990scheduleA: New PDF extractor.
This commit is contained in:
parent
1908358c30
commit
1c95c1b1b1
2 changed files with 85 additions and 1 deletions
83
conservancy_beancount/pdfforms/extract/irs990scheduleA.py
Normal file
83
conservancy_beancount/pdfforms/extract/irs990scheduleA.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
"""irs990scheduleA.py - Extract IRS 990 Schedule A form data from the prior FY"""
|
||||||
|
# Copyright © 2021 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import functools
|
||||||
|
import itertools
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from . import FormExtractor, main
|
||||||
|
from .. import fields as fieldmod
|
||||||
|
from ... import cliutil
|
||||||
|
|
||||||
|
from typing import (
|
||||||
|
Iterable,
|
||||||
|
Iterator,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
)
|
||||||
|
|
||||||
|
PROGNAME = 'pdfform-extract-irs990scheduleA'
|
||||||
|
logger = logging.getLogger('conservancy_beancount.pdfforms.extract.irs990scheduleA')
|
||||||
|
|
||||||
|
def _make_shifts(
|
||||||
|
key_fmt: str,
|
||||||
|
start_count: int,
|
||||||
|
shift_count: int=4,
|
||||||
|
clear_count: int=2,
|
||||||
|
) -> Iterator[Tuple[str, Optional[str]]]:
|
||||||
|
for index in range(start_count, start_count + shift_count):
|
||||||
|
yield (key_fmt.format(index), key_fmt.format(index + 1))
|
||||||
|
index += 1
|
||||||
|
for index in range(index, index + clear_count):
|
||||||
|
yield (key_fmt.format(index), None)
|
||||||
|
|
||||||
|
class IRS990ScheduleAExtractor(FormExtractor):
|
||||||
|
_BLANK_FIELDS = [
|
||||||
|
'topmostSubform[0].Page2[0].Table_SectionA[0].Line5[0].f2_25[0]',
|
||||||
|
'topmostSubform[0].Page2[0].Table_SectionA[0].Line6[0].f2_26[0]',
|
||||||
|
'topmostSubform[0].Page2[0].Table_SectionB[0].Line11[0].f2_51[0]',
|
||||||
|
'topmostSubform[0].Page2[0].f2_52[0]',
|
||||||
|
'topmostSubform[0].Page2[0].f2_53[0]',
|
||||||
|
'topmostSubform[0].Page2[0].c2_2[0]',
|
||||||
|
'topmostSubform[0].Page2[0].c2_4[0]',
|
||||||
|
]
|
||||||
|
_FIELD_SOURCES = dict(itertools.chain(
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line1[0].f2_{}[0]', 1),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line2[0].f2_{}[0]', 7),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line3[0].f2_{}[0]', 13),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line4[0].f2_{}[0]', 19),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line7[0].f2_{}[0]', 27),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line8[0].f2_{}[0]', 33),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line9[0].f2_{}[0]', 39),
|
||||||
|
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line10[0].f2_{}[0]', 45),
|
||||||
|
iter((key, None) for key in _BLANK_FIELDS),
|
||||||
|
))
|
||||||
|
# Part II Section C
|
||||||
|
_FIELD_SOURCES['topmostSubform[0].Page2[0].f2_54[0]'] = 'topmostSubform[0].Page2[0].f2_53[0]'
|
||||||
|
_FIELD_SOURCES['topmostSubform[0].Page2[0].c2_3[0]'] = 'topmostSubform[0].Page2[0].c2_2[0]'
|
||||||
|
_FIELD_SOURCES['topmostSubform[0].Page2[0].c2_5[0]'] = 'topmostSubform[0].Page2[0].c2_4[0]'
|
||||||
|
|
||||||
|
def _transform_fields(self, fields: Iterable[fieldmod.FormField]) -> None:
|
||||||
|
fields_map = dict(
|
||||||
|
kvpair
|
||||||
|
for field in fields
|
||||||
|
for kvpair in field.as_mapping()
|
||||||
|
)
|
||||||
|
new_values = {
|
||||||
|
key: None if src_key is None else fields_map[src_key].value()
|
||||||
|
for key, src_key in self._FIELD_SOURCES.items()
|
||||||
|
}
|
||||||
|
for key, value in new_values.items():
|
||||||
|
fields_map[key].set_value(value)
|
||||||
|
|
||||||
|
|
||||||
|
main = functools.partial(main, extract_cls=IRS990ScheduleAExtractor)
|
||||||
|
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
exit(entry_point())
|
3
setup.py
3
setup.py
|
@ -5,7 +5,7 @@ from setuptools import setup
|
||||||
setup(
|
setup(
|
||||||
name='conservancy_beancount',
|
name='conservancy_beancount',
|
||||||
description="Plugin, library, and reports for reading Conservancy's books",
|
description="Plugin, library, and reports for reading Conservancy's books",
|
||||||
version='1.15.2',
|
version='1.15.3',
|
||||||
author='Software Freedom Conservancy',
|
author='Software Freedom Conservancy',
|
||||||
author_email='info@sfconservancy.org',
|
author_email='info@sfconservancy.org',
|
||||||
license='GNU AGPLv3+',
|
license='GNU AGPLv3+',
|
||||||
|
@ -50,6 +50,7 @@ setup(
|
||||||
'ledger-report = conservancy_beancount.reports.ledger:entry_point',
|
'ledger-report = conservancy_beancount.reports.ledger:entry_point',
|
||||||
'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
|
'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
|
||||||
'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
|
'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
|
||||||
|
'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
|
||||||
'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
|
'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
|
||||||
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
|
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
|
||||||
],
|
],
|
||||||
|
|
Loading…
Add table
Reference in a new issue