conservancy_beancount/tests/test_pdfforms_extract.py
Brett Smith 13c66e8ce2 pdfforms: Initial module and tool to extract PDF form data to YAML.
Next steps:

* A tool to fill the PDF form based on values written to that YAML.
* An extension to fill some of those values with numbers queried from the
  books (which is why we need something more involved than FDF).
2021-01-09 10:09:08 -05:00

62 lines
2.3 KiB
Python

"""test_pdfforms_extract.py - Unit tests for PDF form extractor"""
# Copyright © 2020 Brett Smith
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
#
# Full copyright and licensing details can be found at toplevel file
# LICENSE.txt in the repository.
import io
import itertools
import pytest
import yaml
from . import testutil
from pathlib import Path
from conservancy_beancount.pdfforms import extract as extractmod
def compare_to_yaml(actual, yaml_path, from_file, form_key):
if isinstance(yaml_path, str):
yaml_path = testutil.test_path(f'pdfforms/{yaml_path}')
with yaml_path.open() as yaml_file:
expect_fields = yaml.safe_load(yaml_file)
assert actual.get('from file') == from_file
assert actual.get('form key') == form_key
for act_f, exp_f in itertools.zip_longest(actual.get('fields', ()), expect_fields):
for key, exp_value in exp_f.items():
assert act_f[key] == exp_value
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
('form1.fdf', 'FDF', 'form1.yml'),
])
def test_extract_from_path(fdf_filename, form_key, fields_yaml):
fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
with extractmod.FormExtractor.from_path(fdf_path) as extractor:
actual = extractor.extract()
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
('form1.fdf', 'FDF', 'form1.yml'),
])
def test_extract_from_file(fdf_filename, form_key, fields_yaml):
with testutil.test_path(f'pdfforms/{fdf_filename}').open('rb') as fdf_file:
extractor = extractmod.FormExtractor.from_file(fdf_file)
actual = extractor.extract()
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
('form1.fdf', 'FDF', 'form1.yml'),
])
def test_main(fdf_filename, form_key, fields_yaml):
fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
arglist = [str(fdf_path)]
stdout = io.StringIO()
stderr = io.StringIO()
returncode = extractmod.main(arglist, stdout, stderr)
assert returncode == 0
assert not stderr.getvalue()
stdout.seek(0)
actual = yaml.safe_load(stdout)
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)