irs990scheduleA: New PDF extractor.
This commit is contained in:
parent
1908358c30
commit
1c95c1b1b1
2 changed files with 85 additions and 1 deletions
83
conservancy_beancount/pdfforms/extract/irs990scheduleA.py
Normal file
83
conservancy_beancount/pdfforms/extract/irs990scheduleA.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
"""irs990scheduleA.py - Extract IRS 990 Schedule A form data from the prior FY"""
|
||||
# Copyright © 2021 Brett Smith
|
||||
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||
#
|
||||
# Full copyright and licensing details can be found at toplevel file
|
||||
# LICENSE.txt in the repository.
|
||||
|
||||
import collections
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
|
||||
from . import FormExtractor, main
|
||||
from .. import fields as fieldmod
|
||||
from ... import cliutil
|
||||
|
||||
from typing import (
|
||||
Iterable,
|
||||
Iterator,
|
||||
Optional,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
PROGNAME = 'pdfform-extract-irs990scheduleA'
|
||||
logger = logging.getLogger('conservancy_beancount.pdfforms.extract.irs990scheduleA')
|
||||
|
||||
def _make_shifts(
|
||||
key_fmt: str,
|
||||
start_count: int,
|
||||
shift_count: int=4,
|
||||
clear_count: int=2,
|
||||
) -> Iterator[Tuple[str, Optional[str]]]:
|
||||
for index in range(start_count, start_count + shift_count):
|
||||
yield (key_fmt.format(index), key_fmt.format(index + 1))
|
||||
index += 1
|
||||
for index in range(index, index + clear_count):
|
||||
yield (key_fmt.format(index), None)
|
||||
|
||||
class IRS990ScheduleAExtractor(FormExtractor):
|
||||
_BLANK_FIELDS = [
|
||||
'topmostSubform[0].Page2[0].Table_SectionA[0].Line5[0].f2_25[0]',
|
||||
'topmostSubform[0].Page2[0].Table_SectionA[0].Line6[0].f2_26[0]',
|
||||
'topmostSubform[0].Page2[0].Table_SectionB[0].Line11[0].f2_51[0]',
|
||||
'topmostSubform[0].Page2[0].f2_52[0]',
|
||||
'topmostSubform[0].Page2[0].f2_53[0]',
|
||||
'topmostSubform[0].Page2[0].c2_2[0]',
|
||||
'topmostSubform[0].Page2[0].c2_4[0]',
|
||||
]
|
||||
_FIELD_SOURCES = dict(itertools.chain(
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line1[0].f2_{}[0]', 1),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line2[0].f2_{}[0]', 7),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line3[0].f2_{}[0]', 13),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionA[0].Line4[0].f2_{}[0]', 19),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line7[0].f2_{}[0]', 27),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line8[0].f2_{}[0]', 33),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line9[0].f2_{}[0]', 39),
|
||||
_make_shifts('topmostSubform[0].Page2[0].Table_SectionB[0].Line10[0].f2_{}[0]', 45),
|
||||
iter((key, None) for key in _BLANK_FIELDS),
|
||||
))
|
||||
# Part II Section C
|
||||
_FIELD_SOURCES['topmostSubform[0].Page2[0].f2_54[0]'] = 'topmostSubform[0].Page2[0].f2_53[0]'
|
||||
_FIELD_SOURCES['topmostSubform[0].Page2[0].c2_3[0]'] = 'topmostSubform[0].Page2[0].c2_2[0]'
|
||||
_FIELD_SOURCES['topmostSubform[0].Page2[0].c2_5[0]'] = 'topmostSubform[0].Page2[0].c2_4[0]'
|
||||
|
||||
def _transform_fields(self, fields: Iterable[fieldmod.FormField]) -> None:
|
||||
fields_map = dict(
|
||||
kvpair
|
||||
for field in fields
|
||||
for kvpair in field.as_mapping()
|
||||
)
|
||||
new_values = {
|
||||
key: None if src_key is None else fields_map[src_key].value()
|
||||
for key, src_key in self._FIELD_SOURCES.items()
|
||||
}
|
||||
for key, value in new_values.items():
|
||||
fields_map[key].set_value(value)
|
||||
|
||||
|
||||
main = functools.partial(main, extract_cls=IRS990ScheduleAExtractor)
|
||||
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(entry_point())
|
3
setup.py
3
setup.py
|
@ -5,7 +5,7 @@ from setuptools import setup
|
|||
setup(
|
||||
name='conservancy_beancount',
|
||||
description="Plugin, library, and reports for reading Conservancy's books",
|
||||
version='1.15.2',
|
||||
version='1.15.3',
|
||||
author='Software Freedom Conservancy',
|
||||
author_email='info@sfconservancy.org',
|
||||
license='GNU AGPLv3+',
|
||||
|
@ -50,6 +50,7 @@ setup(
|
|||
'ledger-report = conservancy_beancount.reports.ledger:entry_point',
|
||||
'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
|
||||
'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
|
||||
'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
|
||||
'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
|
||||
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
|
||||
],
|
||||
|
|
Loading…
Reference in a new issue