2021-01-04 21:10:11 +00:00
|
|
|
"""test_pdfforms_extract.py - Unit tests for PDF form extractor"""
|
|
|
|
# Copyright © 2020 Brett Smith
|
|
|
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
|
|
|
#
|
|
|
|
# Full copyright and licensing details can be found at toplevel file
|
|
|
|
# LICENSE.txt in the repository.
|
|
|
|
|
|
|
|
import io
|
|
|
|
import itertools
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import yaml
|
|
|
|
|
|
|
|
from . import testutil
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from conservancy_beancount.pdfforms import extract as extractmod
|
|
|
|
|
|
|
|
def compare_to_yaml(actual, yaml_path, from_file, form_key):
|
|
|
|
if isinstance(yaml_path, str):
|
|
|
|
yaml_path = testutil.test_path(f'pdfforms/{yaml_path}')
|
|
|
|
with yaml_path.open() as yaml_file:
|
2021-01-05 23:30:00 +00:00
|
|
|
expect_fields = yaml.safe_load(yaml_file)['fields']
|
2021-01-04 21:10:11 +00:00
|
|
|
assert actual.get('from file') == from_file
|
|
|
|
assert actual.get('form key') == form_key
|
|
|
|
for act_f, exp_f in itertools.zip_longest(actual.get('fields', ()), expect_fields):
|
|
|
|
for key, exp_value in exp_f.items():
|
|
|
|
assert act_f[key] == exp_value
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
|
|
|
('form1.fdf', 'FDF', 'form1.yml'),
|
|
|
|
])
|
|
|
|
def test_extract_from_path(fdf_filename, form_key, fields_yaml):
|
|
|
|
fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
|
|
|
|
with extractmod.FormExtractor.from_path(fdf_path) as extractor:
|
|
|
|
actual = extractor.extract()
|
|
|
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
|
|
|
('form1.fdf', 'FDF', 'form1.yml'),
|
|
|
|
])
|
|
|
|
def test_extract_from_file(fdf_filename, form_key, fields_yaml):
|
|
|
|
with testutil.test_path(f'pdfforms/{fdf_filename}').open('rb') as fdf_file:
|
|
|
|
extractor = extractmod.FormExtractor.from_file(fdf_file)
|
|
|
|
actual = extractor.extract()
|
|
|
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|
|
|
|
|
2021-01-09 18:20:15 +00:00
|
|
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
|
|
|
('form1_fill.fdf', 'FDF', 'form1_fill.yml'),
|
|
|
|
])
|
|
|
|
def test_extract_from_file(fdf_filename, form_key, fields_yaml):
|
|
|
|
with testutil.test_path(f'pdfforms/{fdf_filename}').open('rb') as fdf_file:
|
|
|
|
extractor = extractmod.FormExtractor.from_file(fdf_file)
|
|
|
|
actual = extractor.extract()
|
|
|
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|
|
|
|
|
2021-01-04 21:10:11 +00:00
|
|
|
@pytest.mark.parametrize('fdf_filename,form_key,fields_yaml', [
|
|
|
|
('form1.fdf', 'FDF', 'form1.yml'),
|
|
|
|
])
|
|
|
|
def test_main(fdf_filename, form_key, fields_yaml):
|
|
|
|
fdf_path = testutil.test_path(f'pdfforms/{fdf_filename}')
|
|
|
|
arglist = [str(fdf_path)]
|
|
|
|
stdout = io.StringIO()
|
|
|
|
stderr = io.StringIO()
|
|
|
|
returncode = extractmod.main(arglist, stdout, stderr)
|
|
|
|
assert returncode == 0
|
|
|
|
assert not stderr.getvalue()
|
|
|
|
stdout.seek(0)
|
|
|
|
actual = yaml.safe_load(stdout)
|
|
|
|
compare_to_yaml(actual, fields_yaml, fdf_filename, form_key)
|