pdfforms: Initial module and tool to extract PDF form data to YAML.
Next steps: * A tool to fill the PDF form based on values written to that YAML. * An extension to fill some of those values with numbers queried from the books (which is why we need something more involved than FDF).
This commit is contained in:
parent
1b7fdf4f3b
commit
13c66e8ce2
10 changed files with 1007 additions and 0 deletions
0
conservancy_beancount/pdfforms/__init__.py
Normal file
0
conservancy_beancount/pdfforms/__init__.py
Normal file
18
conservancy_beancount/pdfforms/errors.py
Normal file
18
conservancy_beancount/pdfforms/errors.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
"""errors.py - Exception classes for PDF reporting errors"""
|
||||||
|
# Copyright © 2021 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
class PDFError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class PDFKeyError(KeyError, PDFError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class PDFSpecError(ValueError, PDFError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NoFormDataError(ValueError, PDFError):
|
||||||
|
pass
|
179
conservancy_beancount/pdfforms/extract.py
Normal file
179
conservancy_beancount/pdfforms/extract.py
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
"""extract.py - Extract form data from PDF files"""
|
||||||
|
# Copyright © 2021 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import contextlib
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from . import fields as fieldmod
|
||||||
|
from . import utils as pdfutils
|
||||||
|
from .. import cliutil
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from pdfminer.pdfdocument import PDFDocument # type:ignore[import]
|
||||||
|
from pdfminer.pdfparser import PDFParser # type:ignore[import]
|
||||||
|
from pdfminer.pdftypes import resolve1 # type:ignore[import]
|
||||||
|
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
BinaryIO,
|
||||||
|
Iterator,
|
||||||
|
Mapping,
|
||||||
|
Optional,
|
||||||
|
Sequence,
|
||||||
|
TextIO,
|
||||||
|
)
|
||||||
|
|
||||||
|
PROGNAME = 'pdfform-extract'
|
||||||
|
logger = logging.getLogger('conservancy_beancount.pdfforms.extract')
|
||||||
|
|
||||||
|
class FormExtractor:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
pdf: PDFDocument,
|
||||||
|
form_key: Optional[str]=None,
|
||||||
|
source: Optional[str]=None,
|
||||||
|
) -> None:
|
||||||
|
if form_key is None:
|
||||||
|
form_key = pdfutils.guess_form_key(pdf)
|
||||||
|
self.document = pdf
|
||||||
|
self.form_key = form_key
|
||||||
|
self.source = source
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def from_path(
|
||||||
|
cls,
|
||||||
|
path: Path,
|
||||||
|
form_key: Optional[str]=None,
|
||||||
|
) -> Iterator['FormExtractor']:
|
||||||
|
pdf_file = path.open('rb')
|
||||||
|
try:
|
||||||
|
yield cls.from_file(pdf_file, form_key, path)
|
||||||
|
finally:
|
||||||
|
pdf_file.close()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_file(
|
||||||
|
cls,
|
||||||
|
source: BinaryIO,
|
||||||
|
form_key: Optional[str]=None,
|
||||||
|
source_path: Optional[Path]=None,
|
||||||
|
) -> 'FormExtractor':
|
||||||
|
if source_path is None:
|
||||||
|
source_path = Path(source.name)
|
||||||
|
parser = PDFParser(source)
|
||||||
|
pdf_doc = PDFDocument(parser)
|
||||||
|
return cls(pdf_doc, form_key, source_path.name)
|
||||||
|
|
||||||
|
def _extract_field(
|
||||||
|
self,
|
||||||
|
field: fieldmod.FormField,
|
||||||
|
name_prefix: str='',
|
||||||
|
) -> Iterator[Mapping[str, Any]]:
|
||||||
|
name = name_prefix + field.name()
|
||||||
|
yield_this = not field.is_readonly()
|
||||||
|
try:
|
||||||
|
field_type = field.field_type().name
|
||||||
|
except ValueError:
|
||||||
|
yield_this = False
|
||||||
|
if yield_this:
|
||||||
|
retval = {
|
||||||
|
'fdf': {
|
||||||
|
'type': field_type,
|
||||||
|
'name': name,
|
||||||
|
},
|
||||||
|
'description': f'{field_type} {name}',
|
||||||
|
'value': field.fill_value(),
|
||||||
|
}
|
||||||
|
if isinstance(field, fieldmod.CheckboxField):
|
||||||
|
retval['fdf']['options'] = field.options()
|
||||||
|
yield retval
|
||||||
|
name += '.'
|
||||||
|
for kid in field.kids():
|
||||||
|
yield from self._extract_field(kid, name)
|
||||||
|
|
||||||
|
def extract(self) -> Mapping[str, Any]:
|
||||||
|
return {
|
||||||
|
'from file': self.source,
|
||||||
|
'form key': self.form_key,
|
||||||
|
'fields': [
|
||||||
|
field
|
||||||
|
for field_source in resolve1(self.document.catalog[self.form_key])['Fields']
|
||||||
|
for field in self._extract_field(fieldmod.FormField.by_type(resolve1(field_source)))
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FormYAMLDumper(yaml.dumper.SafeDumper):
|
||||||
|
def represent_mapping(self, tag: Any, value: Any, flow_style: Any=None) -> Any:
|
||||||
|
if flow_style is None:
|
||||||
|
# We never want mappings flowed by default.
|
||||||
|
flow_style = False
|
||||||
|
# If the super method can call value.items(), it does that and re-sorts
|
||||||
|
# the result. We don't want re-sorted output, so call value.items() now
|
||||||
|
# as a bypass.
|
||||||
|
try:
|
||||||
|
value = value.items()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
return super().represent_mapping(tag, value, flow_style)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(prog=PROGNAME)
|
||||||
|
cliutil.add_version_argument(parser)
|
||||||
|
cliutil.add_loglevel_argument(parser)
|
||||||
|
parser.add_argument(
|
||||||
|
'--form-key', '-f',
|
||||||
|
metavar='KEY',
|
||||||
|
help="""Key in the document catalog with form data.
|
||||||
|
Default is guessed by examining the document.
|
||||||
|
""")
|
||||||
|
parser.add_argument(
|
||||||
|
'--output-file', '-O',
|
||||||
|
metavar='PATH',
|
||||||
|
type=Path,
|
||||||
|
help="""Write output YAML to this file, or stdout when PATH is `-`.
|
||||||
|
Default stdout.
|
||||||
|
""")
|
||||||
|
parser.add_argument(
|
||||||
|
'document',
|
||||||
|
type=Path,
|
||||||
|
help="""PDF or FDF file to extract form data from.
|
||||||
|
Use `-` to read from stdin.
|
||||||
|
""")
|
||||||
|
return parser.parse_args(arglist)
|
||||||
|
|
||||||
|
def main(arglist: Optional[Sequence[str]]=None,
|
||||||
|
stdout: TextIO=sys.stdout,
|
||||||
|
stderr: TextIO=sys.stderr,
|
||||||
|
) -> int:
|
||||||
|
args = parse_arguments(arglist)
|
||||||
|
cliutil.set_loglevel(logger, args.loglevel)
|
||||||
|
with contextlib.ExitStack() as exit_stack:
|
||||||
|
if args.document == cliutil.STDSTREAM_PATH:
|
||||||
|
extractor = FormExtractor.from_file(sys.stdin.buffer, args.form_key)
|
||||||
|
else:
|
||||||
|
extractor = exit_stack.enter_context(
|
||||||
|
FormExtractor.from_path(args.document, args.form_key),
|
||||||
|
)
|
||||||
|
extracted_form = extractor.extract()
|
||||||
|
with contextlib.ExitStack() as exit_stack:
|
||||||
|
out_file = cliutil.text_output(args.output_file, stdout)
|
||||||
|
if out_file is not stdout:
|
||||||
|
exit_stack.enter_context(out_file)
|
||||||
|
yaml.dump(extracted_form, out_file, Dumper=FormYAMLDumper)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
exit(entry_point())
|
245
conservancy_beancount/pdfforms/fields.py
Normal file
245
conservancy_beancount/pdfforms/fields.py
Normal file
|
@ -0,0 +1,245 @@
|
||||||
|
"""fields.py - Python classes to read and write PDF form data"""
|
||||||
|
# Copyright © 2020 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
import enum
|
||||||
|
import functools
|
||||||
|
|
||||||
|
from pdfminer.pdftypes import resolve1 # type:ignore[import]
|
||||||
|
from pdfminer import psparser # type:ignore[import]
|
||||||
|
from . import utils as pdfutils
|
||||||
|
from .errors import PDFKeyError, PDFSpecError
|
||||||
|
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Iterator,
|
||||||
|
Optional,
|
||||||
|
Mapping,
|
||||||
|
MutableMapping,
|
||||||
|
Sequence,
|
||||||
|
Tuple,
|
||||||
|
Union,
|
||||||
|
)
|
||||||
|
|
||||||
|
FieldSource = MutableMapping[str, Any]
|
||||||
|
|
||||||
|
class FieldFlags(enum.IntFlag):
|
||||||
|
# Flags for all fields
|
||||||
|
ReadOnly = 2 ** 0
|
||||||
|
Required = 2 ** 1
|
||||||
|
NoExport = 2 ** 2
|
||||||
|
# Flags for buttons
|
||||||
|
NoToggleToOff = 2 ** 14
|
||||||
|
Radio = 2 ** 15
|
||||||
|
Pushbutton = 2 ** 16
|
||||||
|
RadiosInUnison = 2 ** 25
|
||||||
|
# Flags for text
|
||||||
|
Multiline = 2 ** 12
|
||||||
|
Password = 2 ** 13
|
||||||
|
FileSelect = 2 ** 20
|
||||||
|
DoNotSpellCheck = 2 ** 22
|
||||||
|
DoNotScroll = 2 ** 23
|
||||||
|
Comb = 2 ** 24
|
||||||
|
RichText = 2 ** 25
|
||||||
|
|
||||||
|
|
||||||
|
class FieldType(enum.Enum):
|
||||||
|
Btn = 'Btn'
|
||||||
|
BUTTON = Btn
|
||||||
|
Ch = 'Ch'
|
||||||
|
CHOICE = Ch
|
||||||
|
Sig = 'Sig'
|
||||||
|
SIG = Sig
|
||||||
|
SIGNATURE = Sig
|
||||||
|
Tx = 'Tx'
|
||||||
|
TEXT = Tx
|
||||||
|
|
||||||
|
|
||||||
|
class FormField:
|
||||||
|
__slots__ = ['_source']
|
||||||
|
_SENTINEL = object()
|
||||||
|
DEFAULT_FILL: object = None
|
||||||
|
INHERITABLE = frozenset([
|
||||||
|
'DV',
|
||||||
|
'Ff',
|
||||||
|
'FT',
|
||||||
|
'MaxLen',
|
||||||
|
'Opt',
|
||||||
|
'V',
|
||||||
|
])
|
||||||
|
|
||||||
|
def __init__(self, source: FieldSource) -> None:
|
||||||
|
self._source = source
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def by_type(cls, source: FieldSource) -> 'FormField':
|
||||||
|
retval = cls(source)
|
||||||
|
try:
|
||||||
|
field_type = retval.field_type()
|
||||||
|
except ValueError:
|
||||||
|
return retval
|
||||||
|
flags = retval.flags()
|
||||||
|
if field_type is FieldType.BUTTON:
|
||||||
|
if flags & FieldFlags.Radio:
|
||||||
|
pass
|
||||||
|
elif flags & FieldFlags.Pushbutton:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
retval.__class__ = CheckboxField
|
||||||
|
elif field_type is FieldType.TEXT:
|
||||||
|
retval.__class__ = TextField
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def _get_value(self, key: str, default: Any=_SENTINEL) -> Any:
|
||||||
|
can_inherit = key in self.INHERITABLE
|
||||||
|
source: Optional[FieldSource] = self._source
|
||||||
|
while source is not None:
|
||||||
|
try:
|
||||||
|
return resolve1(source[key])
|
||||||
|
except KeyError:
|
||||||
|
source = resolve1(source.get('Parent')) if can_inherit else None
|
||||||
|
if default is self._SENTINEL:
|
||||||
|
raise PDFKeyError(key)
|
||||||
|
else:
|
||||||
|
return default
|
||||||
|
|
||||||
|
def field_type(self) -> FieldType:
|
||||||
|
try:
|
||||||
|
source = self._get_value('FT')
|
||||||
|
except KeyError:
|
||||||
|
raise PDFSpecError("field does not specify a field type") from None
|
||||||
|
try:
|
||||||
|
return FieldType[source.name]
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
raise PDFSpecError(f"field has invalid field type {source!r}") from None
|
||||||
|
|
||||||
|
def kids(self) -> Iterator['FormField']:
|
||||||
|
for source in self._get_value('Kids', ()):
|
||||||
|
yield self.by_type(resolve1(source))
|
||||||
|
|
||||||
|
def parent(self) -> Optional['FormField']:
|
||||||
|
try:
|
||||||
|
return self.by_type(self._get_value('Parent'))
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def is_terminal(self) -> bool:
|
||||||
|
return not self._get_value('Kids', None)
|
||||||
|
|
||||||
|
def flags(self) -> int:
|
||||||
|
return self._get_value('Ff', 0) # type:ignore[no-any-return]
|
||||||
|
|
||||||
|
def is_readonly(self) -> bool:
|
||||||
|
return bool(self.flags() & FieldFlags.ReadOnly)
|
||||||
|
|
||||||
|
def name(self) -> str:
|
||||||
|
return pdfutils.decode_text(self._get_value('T', b''))
|
||||||
|
|
||||||
|
def value(self) -> Any:
|
||||||
|
return self._get_value('V', None) # type:ignore[no-any-return]
|
||||||
|
|
||||||
|
def set_value(self, value: Any) -> None:
|
||||||
|
self._source['V'] = value
|
||||||
|
|
||||||
|
def fill_value(self) -> Any:
|
||||||
|
return resolve1(self._source.get('V', self.DEFAULT_FILL))
|
||||||
|
|
||||||
|
def as_filled_fdf(self) -> Mapping[str, Any]:
|
||||||
|
retval: FieldSource = {}
|
||||||
|
try:
|
||||||
|
retval['T'] = pdfutils.decode_text(self._source['T'])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
value = self.fill_value()
|
||||||
|
if value is not None:
|
||||||
|
retval['V'] = value
|
||||||
|
kids = [kid.as_filled_fdf() for kid in self.kids()]
|
||||||
|
if kids:
|
||||||
|
retval['Kids'] = kids
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def as_mapping(self, name_prefix: str='') -> Iterator[Tuple[str, 'FormField']]:
|
||||||
|
name = name_prefix + self.name()
|
||||||
|
yield (name, self)
|
||||||
|
name += '.'
|
||||||
|
for kid in self.kids():
|
||||||
|
yield from kid.as_mapping(name)
|
||||||
|
|
||||||
|
|
||||||
|
class CheckboxField(FormField):
|
||||||
|
__slots__: Sequence[str] = []
|
||||||
|
OFF = 'Off'
|
||||||
|
ON = 'Yes'
|
||||||
|
|
||||||
|
@functools.lru_cache()
|
||||||
|
def options(self) -> Sequence[str]:
|
||||||
|
try:
|
||||||
|
keys: Tuple[str, ...] = tuple(self._source['AP']['N'])
|
||||||
|
except KeyError:
|
||||||
|
keys = ()
|
||||||
|
count = len(keys)
|
||||||
|
if count == 0:
|
||||||
|
return [self.ON, self.OFF]
|
||||||
|
elif count == 1:
|
||||||
|
return [keys[0], self.OFF]
|
||||||
|
elif count > 2:
|
||||||
|
raise PDFSpecError("checkbox has more than two states available")
|
||||||
|
try:
|
||||||
|
off_index = keys.index(self.OFF)
|
||||||
|
except ValueError:
|
||||||
|
try:
|
||||||
|
off_index = 0 if keys.index(self.ON) else 1
|
||||||
|
except ValueError:
|
||||||
|
raise PDFSpecError("checkbox defines two on states") from None
|
||||||
|
return [keys[0 if off_index else 1], keys[off_index]]
|
||||||
|
|
||||||
|
def _bool_value(self, literal_value: Optional[psparser.PSLiteral]) -> Optional[bool]:
|
||||||
|
if literal_value is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
value = literal_value.name
|
||||||
|
except AttributeError:
|
||||||
|
raise PDFSpecError("checkbox value is not a PSLiteral")
|
||||||
|
on, off = self.options()
|
||||||
|
if value == on:
|
||||||
|
return True
|
||||||
|
elif value == off:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise PDFSpecError(f"checkbox has unknown value {value!r}")
|
||||||
|
|
||||||
|
def value(self) -> Optional[bool]:
|
||||||
|
return self._bool_value(super().value())
|
||||||
|
|
||||||
|
def set_value(self, value: Optional[bool]) -> None:
|
||||||
|
if value is None:
|
||||||
|
literal_value: Optional[psparser.PSLiteral] = None
|
||||||
|
else:
|
||||||
|
on, off = self.options()
|
||||||
|
literal_value = psparser.PSLiteralTable.intern(on if value else off)
|
||||||
|
super().set_value(literal_value)
|
||||||
|
|
||||||
|
|
||||||
|
class TextField(FormField):
|
||||||
|
__slots__: Sequence[str] = []
|
||||||
|
DEFAULT_FILL = b''
|
||||||
|
|
||||||
|
def _decode(self, value: Any) -> Optional[str]:
|
||||||
|
if value is None:
|
||||||
|
return value
|
||||||
|
elif isinstance(value, bytes):
|
||||||
|
return pdfutils.decode_text(value)
|
||||||
|
else:
|
||||||
|
raise PDFSpecError("text field value is not bytes")
|
||||||
|
|
||||||
|
def value(self) -> Optional[str]:
|
||||||
|
return self._decode(super().value())
|
||||||
|
|
||||||
|
def set_value(self, value: Optional[str]) -> None:
|
||||||
|
super().set_value(None if value is None else pdfutils.encode_text(value))
|
||||||
|
|
||||||
|
def fill_value(self) -> Optional[str]:
|
||||||
|
return self._decode(super().fill_value())
|
53
conservancy_beancount/pdfforms/utils.py
Normal file
53
conservancy_beancount/pdfforms/utils.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
"""utils.py - Utility methods for working with PDFs"""
|
||||||
|
# Copyright © 2020 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
from codecs import BOM_UTF16_BE
|
||||||
|
|
||||||
|
import pdfminer.utils # type:ignore[import]
|
||||||
|
|
||||||
|
from . import errors as pdferrors
|
||||||
|
|
||||||
|
from pdfminer.pdfdocument import PDFDocument # type:ignore[import]
|
||||||
|
from pdfminer.pdftypes import resolve1 # type:ignore[import]
|
||||||
|
|
||||||
|
from typing import (
|
||||||
|
Callable,
|
||||||
|
)
|
||||||
|
|
||||||
|
decode_text: Callable[[bytes], str] = pdfminer.utils.decode_text
|
||||||
|
|
||||||
|
def encode_text(s: str) -> bytes:
|
||||||
|
"""Encode a string to bytes for PDF
|
||||||
|
|
||||||
|
If possible, encodes to ASCII for readability and compactness.
|
||||||
|
Otherwise uses UTF-16BE.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return s.encode('ascii')
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
return BOM_UTF16_BE + s.encode('utf-16be')
|
||||||
|
|
||||||
|
def guess_form_key(pdf: PDFDocument) -> str:
|
||||||
|
"""Guess and return the PDF document catalog key with form data
|
||||||
|
|
||||||
|
This function knows common catalog keys that hold PDF form data,
|
||||||
|
searches the given document for form data, and returns the best candidate.
|
||||||
|
Raises ValueError
|
||||||
|
"""
|
||||||
|
catalog = pdf.catalog
|
||||||
|
for key in [
|
||||||
|
'AcroForm',
|
||||||
|
'FDF',
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
'Fields' in resolve1(catalog[key])
|
||||||
|
except (KeyError, TypeError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return key
|
||||||
|
else:
|
||||||
|
raise pdferrors.NoFormDataError("could not find catalog key with form data")
|
3
setup.py
3
setup.py
|
@ -16,6 +16,7 @@ setup(
|
||||||
'GitPython>=2.0', # Debian:python3-git
|
'GitPython>=2.0', # Debian:python3-git
|
||||||
# 1.4.1 crashes when trying to save some documents.
|
# 1.4.1 crashes when trying to save some documents.
|
||||||
'odfpy>=1.4.0,!=1.4.1', # Debian:python3-odf
|
'odfpy>=1.4.0,!=1.4.1', # Debian:python3-odf
|
||||||
|
'pdfminer.six>=20200101',
|
||||||
'PyYAML>=3.0', # Debian:python3-yaml
|
'PyYAML>=3.0', # Debian:python3-yaml
|
||||||
'regex', # Debian:python3-regex
|
'regex', # Debian:python3-regex
|
||||||
'rt>=2.0',
|
'rt>=2.0',
|
||||||
|
@ -31,6 +32,7 @@ setup(
|
||||||
|
|
||||||
packages=[
|
packages=[
|
||||||
'conservancy_beancount',
|
'conservancy_beancount',
|
||||||
|
'conservancy_beancount.pdfforms',
|
||||||
'conservancy_beancount.plugin',
|
'conservancy_beancount.plugin',
|
||||||
'conservancy_beancount.reports',
|
'conservancy_beancount.reports',
|
||||||
'conservancy_beancount.tools',
|
'conservancy_beancount.tools',
|
||||||
|
@ -46,6 +48,7 @@ setup(
|
||||||
'fund-report = conservancy_beancount.reports.fund:entry_point',
|
'fund-report = conservancy_beancount.reports.fund:entry_point',
|
||||||
'ledger-report = conservancy_beancount.reports.ledger:entry_point',
|
'ledger-report = conservancy_beancount.reports.ledger:entry_point',
|
||||||
'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
|
'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
|
||||||
|
'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
|
||||||
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
|
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
72
tests/pdfforms/form1.fdf
Normal file
72
tests/pdfforms/form1.fdf
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
%FDF-1.2
|
||||||
|
%âãÏÓ
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/FDF
|
||||||
|
<<
|
||||||
|
/Fields [
|
||||||
|
<<
|
||||||
|
/T (topform)
|
||||||
|
/Kids [
|
||||||
|
<<
|
||||||
|
/T (text1_0)
|
||||||
|
/FT /Tx
|
||||||
|
/V ()
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
/T (button1)
|
||||||
|
/Kids [
|
||||||
|
<<
|
||||||
|
/FT /Btn
|
||||||
|
/T (button1_0)
|
||||||
|
/AP << /N << /1 1 0 R >> >>
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
/FT /Btn
|
||||||
|
/T (button1_1)
|
||||||
|
/AP << /N << /2 1 0 R >> >>
|
||||||
|
>>
|
||||||
|
]
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
/T (text1_1)
|
||||||
|
/FT /Tx
|
||||||
|
/V ()
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
/T (text2_0)
|
||||||
|
/FT /Tx
|
||||||
|
/V ()
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
/T (button2)
|
||||||
|
/Kids [
|
||||||
|
<<
|
||||||
|
/FT /Btn
|
||||||
|
/T (button2_0)
|
||||||
|
/AP << /N << /1 1 0 R >> >>
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
/FT /Btn
|
||||||
|
/T (button2_1)
|
||||||
|
/AP << /N << /2 1 0 R >> >>
|
||||||
|
>>
|
||||||
|
]
|
||||||
|
>>
|
||||||
|
<<
|
||||||
|
% Readonly
|
||||||
|
/T (text2_R)
|
||||||
|
/FT /Tx
|
||||||
|
/Ff 1
|
||||||
|
>>
|
||||||
|
]
|
||||||
|
>>]
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
trailer
|
||||||
|
|
||||||
|
<<
|
||||||
|
/Root 1 0 R
|
||||||
|
>>
|
||||||
|
%%EOF
|
25
tests/pdfforms/form1.yml
Normal file
25
tests/pdfforms/form1.yml
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
- fdf:
|
||||||
|
type: Tx
|
||||||
|
name: topform.text1_0
|
||||||
|
- fdf:
|
||||||
|
type: Btn
|
||||||
|
name: topform.button1.button1_0
|
||||||
|
options: ['1', 'Off']
|
||||||
|
- fdf:
|
||||||
|
type: Btn
|
||||||
|
name: topform.button1.button1_1
|
||||||
|
options: ['2', 'Off']
|
||||||
|
- fdf:
|
||||||
|
type: Tx
|
||||||
|
name: topform.text1_1
|
||||||
|
- fdf:
|
||||||
|
type: Tx
|
||||||
|
name: topform.text2_0
|
||||||
|
- fdf:
|
||||||
|
type: Btn
|
||||||
|
name: topform.button2.button2_0
|
||||||
|
options: ['1', 'Off']
|
||||||
|
- fdf:
|
||||||
|
type: Btn
|
||||||
|
name: topform.button2.button2_1
|
||||||
|
options: ['2', 'Off']
|
62
tests/test_pdfforms_extract.py
Normal file
62
tests/test_pdfforms_extract.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
"""test_pdfforms_extract.py - Unit tests for PDF form extractor"""
|
||||||
|
# Copyright © 2020 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
import io
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from . import testutil
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from conservancy_beancount.pdfforms import extract as extractmod
|
||||||
|
|
||||||
|
def compare_to_yaml(actual, yaml_path, from_file, form_key):
|
||||||
|
if isinstance(yaml_path, str):
|
||||||
|
yaml_path = testutil.test_path(f'pdfforms/{yaml_path}')
|
||||||
|
with yaml_path.open() as yaml_file:
|
||||||
|
expect_fields = yaml.safe_load(yaml_file)
|
||||||
|
assert actual.get('from file') == from_file
|
||||||
|
assert actual.get('form key') == form_key
|
||||||
|
for act_f, exp_f in itertools.zip_longest(actual.get('fields', ()), expect_fields):
|
||||||
|
for key, exp_value in exp_f.items():
|
||||||
|
assert act_f[key] == exp_value
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
||||||
|
('form1.fdf', 'FDF', 'form1.yml'),
|
||||||
|
])
|
||||||
|
def test_extract_from_path(fdf_filename, form_key, fields_yaml):
|
||||||
|
fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
|
||||||
|
with extractmod.FormExtractor.from_path(fdf_path) as extractor:
|
||||||
|
actual = extractor.extract()
|
||||||
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
||||||
|
('form1.fdf', 'FDF', 'form1.yml'),
|
||||||
|
])
|
||||||
|
def test_extract_from_file(fdf_filename, form_key, fields_yaml):
|
||||||
|
with testutil.test_path(f'pdfforms/{fdf_filename}').open('rb') as fdf_file:
|
||||||
|
extractor = extractmod.FormExtractor.from_file(fdf_file)
|
||||||
|
actual = extractor.extract()
|
||||||
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
||||||
|
('form1.fdf', 'FDF', 'form1.yml'),
|
||||||
|
])
|
||||||
|
def test_main(fdf_filename, form_key, fields_yaml):
|
||||||
|
fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
|
||||||
|
arglist = [str(fdf_path)]
|
||||||
|
stdout = io.StringIO()
|
||||||
|
stderr = io.StringIO()
|
||||||
|
returncode = extractmod.main(arglist, stdout, stderr)
|
||||||
|
assert returncode == 0
|
||||||
|
assert not stderr.getvalue()
|
||||||
|
stdout.seek(0)
|
||||||
|
actual = yaml.safe_load(stdout)
|
||||||
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|
350
tests/test_pdfforms_fields.py
Normal file
350
tests/test_pdfforms_fields.py
Normal file
|
@ -0,0 +1,350 @@
|
||||||
|
"""test_pdfforms_fields.py - Unit tests for PDF forms manipulation"""
|
||||||
|
# Copyright © 2020 Brett Smith
|
||||||
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||||
|
#
|
||||||
|
# Full copyright and licensing details can be found at toplevel file
|
||||||
|
# LICENSE.txt in the repository.
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pdfminer.psparser import PSLiteral
|
||||||
|
|
||||||
|
from conservancy_beancount.pdfforms import fields as fieldsmod
|
||||||
|
|
||||||
|
def field_source(
|
||||||
|
name=None,
|
||||||
|
value=None,
|
||||||
|
field_type=None,
|
||||||
|
flags=None,
|
||||||
|
parent=None,
|
||||||
|
kids=None,
|
||||||
|
*,
|
||||||
|
literal=None,
|
||||||
|
):
|
||||||
|
retval = {}
|
||||||
|
if isinstance(name, str):
|
||||||
|
retval['T'] = name.encode('ascii')
|
||||||
|
elif name is not None:
|
||||||
|
retval['T'] = name
|
||||||
|
if value is not None:
|
||||||
|
if literal is None:
|
||||||
|
literal = field_type and field_type != 'Tx'
|
||||||
|
if literal:
|
||||||
|
value = PSLiteral(value)
|
||||||
|
retval['V'] = value
|
||||||
|
if field_type is not None:
|
||||||
|
retval['FT'] = PSLiteral(field_type)
|
||||||
|
if flags is not None:
|
||||||
|
retval['Ff'] = flags
|
||||||
|
if parent is not None:
|
||||||
|
retval['Parent'] = parent
|
||||||
|
if kids is not None:
|
||||||
|
retval['Kids'] = list(kids)
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def appearance_states(*names):
|
||||||
|
return {key: object() for key in names if key is not None}
|
||||||
|
|
||||||
|
def test_empty_field():
|
||||||
|
source = field_source()
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
assert not field.name()
|
||||||
|
assert field.value() is None
|
||||||
|
assert field.parent() is None
|
||||||
|
assert not list(field.kids())
|
||||||
|
assert field.flags() == 0
|
||||||
|
assert field.is_terminal()
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
field.field_type()
|
||||||
|
|
||||||
|
def test_text_field_base():
|
||||||
|
source = field_source(b's', b'string of text', 'Tx')
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
assert field.field_type() is fieldsmod.FieldType.TEXT
|
||||||
|
assert field.name() == 's'
|
||||||
|
assert field.value() == b'string of text'
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('value', ['Off', 'Yes', 'On'])
|
||||||
|
def test_checkbox_field_base(value):
|
||||||
|
source = field_source(b'cb', value, 'Btn', literal=True)
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
assert field.field_type() is fieldsmod.FieldType.BUTTON
|
||||||
|
assert field.name() == 'cb'
|
||||||
|
assert field.value().name == value
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('flags', range(4))
|
||||||
|
def test_readonly_flag(flags):
|
||||||
|
source = field_source(flags=flags)
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
assert field.flags() == flags
|
||||||
|
assert field.is_readonly() == flags % 2
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('kid_count', range(3))
|
||||||
|
def test_kids(kid_count):
|
||||||
|
kids = [field_source(f'kid{n}', field_type='Ch') for n in range(kid_count)]
|
||||||
|
source = field_source(kids=iter(kids))
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
got_kids = list(field.kids())
|
||||||
|
assert len(got_kids) == len(kids)
|
||||||
|
assert field.is_terminal() == (not kids)
|
||||||
|
for actual, expected in zip(got_kids, kids):
|
||||||
|
assert actual.name() == expected['T'].decode('ascii')
|
||||||
|
|
||||||
|
def test_kids_by_type():
|
||||||
|
kids = [field_source(field_type='Tx'), field_source(field_type='Btn')]
|
||||||
|
source = field_source('topform', kids=iter(kids))
|
||||||
|
actual = fieldsmod.FormField.by_type(source).kids()
|
||||||
|
assert isinstance(next(actual), fieldsmod.TextField)
|
||||||
|
assert isinstance(next(actual), fieldsmod.CheckboxField)
|
||||||
|
assert next(actual, None) is None
|
||||||
|
|
||||||
|
def test_inheritance():
|
||||||
|
parent_source = field_source(b'parent', 'parent value', 'Tx', 17)
|
||||||
|
kid_source = field_source('kid', parent=parent_source)
|
||||||
|
parent_source['Kids'] = [kid_source]
|
||||||
|
field = fieldsmod.FormField(kid_source)
|
||||||
|
parent = field.parent()
|
||||||
|
assert parent is not None
|
||||||
|
assert parent.name() == 'parent'
|
||||||
|
assert not parent.is_terminal()
|
||||||
|
assert field.is_terminal()
|
||||||
|
assert field.name() == 'kid'
|
||||||
|
assert field.field_type() is fieldsmod.FieldType.TEXT
|
||||||
|
assert field.value() == 'parent value'
|
||||||
|
assert field.flags() == 17
|
||||||
|
assert not list(field.kids())
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('field_type,value', [
|
||||||
|
('Tx', b'new value'),
|
||||||
|
('Btn', PSLiteral('Yes')),
|
||||||
|
])
|
||||||
|
def test_set_value(field_type, value):
|
||||||
|
source = field_source(field_type=field_type)
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
assert field.value() is None
|
||||||
|
field.set_value(value)
|
||||||
|
assert field.value() == value
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('field_type,expected', [
|
||||||
|
('Tx', fieldsmod.TextField),
|
||||||
|
('Btn', fieldsmod.CheckboxField),
|
||||||
|
])
|
||||||
|
def test_by_type(field_type, expected):
|
||||||
|
source = field_source(field_type=field_type)
|
||||||
|
field = fieldsmod.FormField.by_type(source)
|
||||||
|
assert isinstance(field, expected)
|
||||||
|
|
||||||
|
def test_container_by_type():
|
||||||
|
kids = [field_source(field_type='Tx'), field_source(field_type='Btn')]
|
||||||
|
source = field_source('topform', kids=iter(kids))
|
||||||
|
field = fieldsmod.FormField.by_type(source)
|
||||||
|
assert isinstance(field, fieldsmod.FormField)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('flag', [
|
||||||
|
# If you add dedicated classes for these types of buttons, you can remove
|
||||||
|
# their test cases.
|
||||||
|
fieldsmod.FieldFlags.Radio,
|
||||||
|
fieldsmod.FieldFlags.Pushbutton,
|
||||||
|
])
|
||||||
|
def test_unsupported_button_by_type(flag):
|
||||||
|
source = field_source(field_type='Btn', flags=flag)
|
||||||
|
field = fieldsmod.FormField.by_type(source)
|
||||||
|
assert type(field) is fieldsmod.FormField
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('field_type', [
|
||||||
|
# If you add dedicated classes for these types of fields, you can remove
|
||||||
|
# their test cases.
|
||||||
|
'Ch',
|
||||||
|
'Sig',
|
||||||
|
])
|
||||||
|
def test_unsupported_field_by_type(field_type):
|
||||||
|
source = field_source(field_type=field_type)
|
||||||
|
field = fieldsmod.FormField.by_type(source)
|
||||||
|
assert type(field) is fieldsmod.FormField
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('value', [None, 'Off', 'Yes'])
|
||||||
|
def test_checkbox_value(value):
|
||||||
|
source = field_source('cb', value, 'Btn', literal=True)
|
||||||
|
field = fieldsmod.CheckboxField(source)
|
||||||
|
assert field.value() == (value and value == 'Yes')
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('value,expected', [
|
||||||
|
(None, None),
|
||||||
|
(False, 'Off'),
|
||||||
|
(True, 'Yes'),
|
||||||
|
])
|
||||||
|
def test_checkbox_set_value(value, expected):
|
||||||
|
source = field_source('cb', field_type='Btn')
|
||||||
|
field = fieldsmod.CheckboxField(source)
|
||||||
|
field.set_value(value)
|
||||||
|
actual = fieldsmod.FormField.value(field)
|
||||||
|
if expected is None:
|
||||||
|
assert actual is None
|
||||||
|
else:
|
||||||
|
assert actual.name == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('on_key,off_key', itertools.product(
|
||||||
|
['1', '2', 'On', 'Yes'],
|
||||||
|
['Off', None],
|
||||||
|
))
|
||||||
|
def test_checkbox_options(on_key, off_key):
|
||||||
|
source = field_source('cb', field_type='Btn')
|
||||||
|
source['AP'] = {'N': appearance_states(on_key, off_key)}
|
||||||
|
field = fieldsmod.CheckboxField(source)
|
||||||
|
assert field.options() == [on_key, 'Off']
|
||||||
|
|
||||||
|
def test_checkbox_options_yes_no():
|
||||||
|
# I'm not sure this is actually allowed under the spec, but…
|
||||||
|
expected = ['Yes', 'No']
|
||||||
|
source = field_source('cb', field_type='Btn')
|
||||||
|
source['AP'] = {'N': appearance_states(*expected)}
|
||||||
|
field = fieldsmod.CheckboxField(source)
|
||||||
|
assert field.options() == expected
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('on_key,off_key,set_value', itertools.product(
|
||||||
|
['1', '2', 'On', 'Yes'],
|
||||||
|
['Off', None],
|
||||||
|
[True, False, None],
|
||||||
|
))
|
||||||
|
def test_checkbox_set_custom_value(on_key, off_key, set_value):
|
||||||
|
source = field_source('cb', field_type='Btn')
|
||||||
|
source['AP'] = {'N': appearance_states(on_key, off_key)}
|
||||||
|
field = fieldsmod.CheckboxField(source)
|
||||||
|
field.set_value(set_value)
|
||||||
|
actual = fieldsmod.FormField.value(field)
|
||||||
|
if set_value is None:
|
||||||
|
assert actual is None
|
||||||
|
elif set_value:
|
||||||
|
assert actual.name == (on_key or 'Yes')
|
||||||
|
else:
|
||||||
|
assert actual.name == 'Off'
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('encoding,prefix', [
|
||||||
|
('ascii', b''),
|
||||||
|
('utf-16be', codecs.BOM_UTF16_BE),
|
||||||
|
])
|
||||||
|
def test_text_value(encoding, prefix):
|
||||||
|
expected = f'{encoding} encoding test'
|
||||||
|
value = prefix + expected.encode(encoding)
|
||||||
|
source = field_source('t', value, 'Tx')
|
||||||
|
field = fieldsmod.TextField(source)
|
||||||
|
assert field.value() == expected
|
||||||
|
|
||||||
|
def test_text_value_none():
|
||||||
|
source = field_source(field_type='Tx')
|
||||||
|
assert fieldsmod.TextField(source).value() is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('text,bprefix', [
|
||||||
|
('ASCII test', b''),
|
||||||
|
('UTF—16 test', codecs.BOM_UTF16_BE),
|
||||||
|
])
|
||||||
|
def test_text_set_value(text, bprefix):
|
||||||
|
source = field_source(field_type='Tx')
|
||||||
|
field = fieldsmod.TextField(source)
|
||||||
|
field.set_value(text)
|
||||||
|
assert field.value() == text
|
||||||
|
actual = fieldsmod.FormField.value(field)
|
||||||
|
assert actual == bprefix + text.encode('utf-16be' if bprefix else 'ascii')
|
||||||
|
|
||||||
|
def test_text_set_value_none():
|
||||||
|
source = field_source('t', b'set None test', 'Tx')
|
||||||
|
field = fieldsmod.TextField(source)
|
||||||
|
field.set_value(None)
|
||||||
|
assert fieldsmod.FormField.value(field) is None
|
||||||
|
|
||||||
|
def test_empty_as_filled_fdf():
|
||||||
|
source = field_source()
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
assert field.as_filled_fdf() == {}
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('field_type,field_class,set_value', [
|
||||||
|
('Btn', fieldsmod.CheckboxField, True),
|
||||||
|
('Btn', fieldsmod.CheckboxField, False),
|
||||||
|
('Ch', fieldsmod.FormField, None),
|
||||||
|
('Tx', fieldsmod.TextField, 'export test'),
|
||||||
|
('Tx', fieldsmod.TextField, 'UTF—16 export'),
|
||||||
|
])
|
||||||
|
def test_as_filled_fdf_after_set_value(field_type, field_class, set_value):
|
||||||
|
source = field_source(field_type, field_type=field_type)
|
||||||
|
field = field_class(source)
|
||||||
|
field.set_value(set_value)
|
||||||
|
actual = field.as_filled_fdf()
|
||||||
|
assert actual['T'] == field_type
|
||||||
|
expect_len = 2
|
||||||
|
if set_value is None:
|
||||||
|
assert 'V' not in actual
|
||||||
|
expect_len = 1
|
||||||
|
elif field_class is fieldsmod.CheckboxField:
|
||||||
|
assert actual['V'].name == ('Yes' if set_value else 'Off')
|
||||||
|
else:
|
||||||
|
assert actual['V'] == set_value
|
||||||
|
assert len(actual) == expect_len
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('field_type,expected', [
|
||||||
|
('Btn', None),
|
||||||
|
('Tx', ''),
|
||||||
|
])
|
||||||
|
def test_as_filled_fdf_default_value(field_type, expected):
|
||||||
|
source = field_source(field_type=field_type)
|
||||||
|
field = fieldsmod.FormField.by_type(source)
|
||||||
|
actual = field.as_filled_fdf()
|
||||||
|
assert actual.get('V') == expected
|
||||||
|
|
||||||
|
def test_as_filled_fdf_recursion():
|
||||||
|
buttons = [field_source(f'bt{n}', field_type='Btn') for n in range(1, 3)]
|
||||||
|
pair = field_source('Buttons', kids=iter(buttons))
|
||||||
|
text = field_source('tx', field_type='Tx')
|
||||||
|
source = field_source('topform', kids=[text, pair])
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
actual = field.as_filled_fdf()
|
||||||
|
assert actual['T'] == 'topform'
|
||||||
|
assert 'V' not in actual
|
||||||
|
actual = iter(actual['Kids'])
|
||||||
|
assert next(actual)['T'] == 'tx'
|
||||||
|
actual = next(actual)
|
||||||
|
assert actual['T'] == 'Buttons'
|
||||||
|
assert 'V' not in actual
|
||||||
|
actual = iter(actual['Kids'])
|
||||||
|
assert next(actual)['T'] == 'bt1'
|
||||||
|
assert next(actual)['T'] == 'bt2'
|
||||||
|
assert next(actual, None) is None
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('name,value,field_type', [
|
||||||
|
(None, None, None),
|
||||||
|
('mt', 'mapping text', 'Tx'),
|
||||||
|
('mb', 'Yes', 'Btn'),
|
||||||
|
])
|
||||||
|
def test_simple_as_mapping(name, value, field_type):
|
||||||
|
source = field_source(name, value, field_type)
|
||||||
|
field = fieldsmod.FormField(source)
|
||||||
|
actual = field.as_mapping()
|
||||||
|
key, mapped = next(actual)
|
||||||
|
assert key == (name or '')
|
||||||
|
assert mapped is field
|
||||||
|
assert next(actual, None) is None
|
||||||
|
|
||||||
|
def test_recursive_as_mapping():
|
||||||
|
btn_kids = [field_source(f'btn{n}', field_type='Btn') for n in range(1, 3)]
|
||||||
|
buttons = field_source('buttons', kids=iter(btn_kids))
|
||||||
|
text_kids = [field_source(f'tx{n}', field_type='Tx') for n in range(1, 3)]
|
||||||
|
texts = field_source('texts', kids=iter(text_kids))
|
||||||
|
source = field_source('root', kids=[texts, buttons])
|
||||||
|
root_field = fieldsmod.FormField(source)
|
||||||
|
actual = root_field.as_mapping()
|
||||||
|
for expected_key in [
|
||||||
|
'root',
|
||||||
|
'root.texts',
|
||||||
|
'root.texts.tx1',
|
||||||
|
'root.texts.tx2',
|
||||||
|
'root.buttons',
|
||||||
|
'root.buttons.btn1',
|
||||||
|
'root.buttons.btn2',
|
||||||
|
]:
|
||||||
|
key, field = next(actual)
|
||||||
|
assert key == expected_key
|
||||||
|
_, _, expected_name = expected_key.rpartition('.')
|
||||||
|
assert field.name() == expected_name
|
||||||
|
assert next(actual, None) is None
|
Loading…
Reference in a new issue