2cf4fdcfad
After you edit the YAML generated by pdfform-extract, you can re-fill the original PDF with pdfform-fill.
381 lines
12 KiB
Python
381 lines
12 KiB
Python
"""test_pdfforms_writer.py - Unit tests for PDF writer"""
|
|
# Copyright © 2020 Brett Smith
|
|
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
|
#
|
|
# Full copyright and licensing details can be found at toplevel file
|
|
# LICENSE.txt in the repository.
|
|
|
|
import codecs
|
|
import io
|
|
import logging
|
|
import re
|
|
import shutil
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from . import testutil
|
|
from pdfminer.pdfdocument import PDFDocument
|
|
from pdfminer.pdfparser import PDFParser
|
|
from pdfminer.pdftypes import resolve1
|
|
from pdfminer.psparser import PSLiteral
|
|
|
|
from conservancy_beancount.pdfforms import fill as fillmod
|
|
|
|
PDFTK = shutil.which('pdftk')
|
|
# Per the PDF spec, 7.2.2 "Character Set" Table 1
|
|
WHITESPACE = b'\x00\x09\x0A\x0C\x0D\x20'
|
|
WHITESPACE_RE = re.compile(b'[' + WHITESPACE + b']+')
|
|
|
|
@pytest.fixture(scope='module')
|
|
def writer():
|
|
return fillmod.PDFWriter()
|
|
|
|
def expected_re(expected):
|
|
pattern = re.escape(expected)
|
|
# Unescape some things that don't strictly need to be escaped.
|
|
pattern = re.sub(rb'\\(<|>| )', rb'\1', pattern)
|
|
# Allow arbitrary whitespace around punctuation tokens.
|
|
pattern = re.sub(rb'(<<|>>|\\\[|\\\])', rb'\\s*\1\\s*', pattern)
|
|
# Allow any kind of whitespace where any is required.
|
|
pattern = WHITESPACE_RE.sub(rb'\\s+', pattern)
|
|
return pattern
|
|
|
|
def utf16_str(s):
|
|
return b''.join([
|
|
b'(',
|
|
codecs.BOM_UTF16_BE,
|
|
s.encode('utf-16be'),
|
|
b')',
|
|
])
|
|
|
|
def open_pdf(source):
|
|
if isinstance(source, Path):
|
|
source = source.open('rb')
|
|
else:
|
|
source.seek(0)
|
|
return PDFDocument(PDFParser(source))
|
|
|
|
def merge_form(yaml_fills, form_filename='form1.fdf', form_key='FDF'):
|
|
with testutil.test_path(f'pdfforms/{form_filename}') as fdf_path:
|
|
pdf = open_pdf(fdf_path)
|
|
pdf_fields = resolve1(pdf.catalog[form_key])['Fields']
|
|
return fillmod.merge_form(yaml_fills, pdf_fields)
|
|
|
|
@pytest.mark.parametrize('source,expected', [
|
|
(None, b'null'),
|
|
(True, b'true'),
|
|
(False, b'false'),
|
|
(0, b'0'),
|
|
(1, b'1'),
|
|
(345, b'345'),
|
|
(34.56, b'34.56'),
|
|
('', b'()'),
|
|
('ascii', b'(ascii)'),
|
|
(')parens(', br'(\)parens\()'),
|
|
('UTF—16', utf16_str('UTF—16')),
|
|
(')¤(', utf16_str(r'\)¤\(')),
|
|
(PSLiteral('lit'), b'/lit'),
|
|
(PSLiteral('# header'), b'/#23#20header'),
|
|
])
|
|
def test_write_scalar(writer, source, expected):
|
|
actual = b''.join(writer.emit(source)).strip(WHITESPACE)
|
|
assert actual == expected
|
|
|
|
@pytest.mark.parametrize('source,expected', [
|
|
([], b'[]'),
|
|
([1, 2, 3], b'[1 2 3]'),
|
|
([[1, 3], [2, 4], []], b'[[1 3][2 4][]]'),
|
|
({}, b'<<>>'),
|
|
({'Yes': True, 'No': False}, b'<</Yes true /No false>>'),
|
|
({'Kids': [1, 2, 3]}, b'<</Kids [1 2 3]>>'),
|
|
])
|
|
def test_write_compound(writer, source, expected):
|
|
pattern = expected_re(expected)
|
|
actual = b''.join(writer.emit(source))
|
|
assert re.fullmatch(pattern, actual)
|
|
|
|
def test_write_document(writer):
|
|
pysrc = {'FDF': {'Fields': [
|
|
{'FT': PSLiteral('Tx'), 'T': 'text'},
|
|
{'FT': PSLiteral('Btn'), 'T': 'check'},
|
|
]}}
|
|
doc = io.BytesIO()
|
|
writer.write_document(pysrc, doc)
|
|
pdf = open_pdf(doc)
|
|
assert len(pdf.catalog) == 1
|
|
actual = resolve1(pdf.catalog['FDF'])
|
|
assert len(actual) == 1
|
|
f1, f2 = actual['Fields']
|
|
assert f1['FT'].name == 'Tx'
|
|
assert f1['T'] == b'text'
|
|
assert f2['FT'].name == 'Btn'
|
|
assert f2['T'] == b'check'
|
|
|
|
def test_merge():
|
|
with testutil.test_path('pdfforms/form1_fill.yml').open() as yaml_file:
|
|
form_yaml = yaml.safe_load(yaml_file)['fields']
|
|
actual, errors = merge_form(form_yaml)
|
|
assert not errors
|
|
expected = {
|
|
'text1_0': 'text 1.0',
|
|
'button1_0': PSLiteral('1'),
|
|
'button1_1': None,
|
|
'text1_1': 'text 1.1',
|
|
'text2_0': 'text 2.0',
|
|
'button2_0': None,
|
|
'button2_1': PSLiteral('2'),
|
|
}
|
|
for field in actual:
|
|
try:
|
|
expect_value = expected.pop(field['T'])
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
actual_value = field.get('V')
|
|
if isinstance(expect_value, PSLiteral):
|
|
assert actual_value.name == expect_value.name
|
|
else:
|
|
assert actual_value == expect_value
|
|
actual.extend(field.get('Kids', ()))
|
|
assert not expected, "not all expected fields found in filled form data"
|
|
|
|
@pytest.mark.parametrize('name', [None, 'nonesuchfield'])
|
|
def test_merge_bad_name(name):
|
|
fill = {'fdf': {}}
|
|
if name is not None:
|
|
fill['fdf']['name'] = name
|
|
_, errors = merge_form([fill])
|
|
error, = errors
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == name
|
|
|
|
@pytest.mark.parametrize('name,yaml_type', [
|
|
('topform.text1_0', 'Btn'),
|
|
('topform.button1.button1_0', 'Tx'),
|
|
])
|
|
def test_merge_yaml_wrong_type(name, yaml_type):
|
|
fill = {'fdf': {'name': name, 'type': yaml_type}}
|
|
_, errors = merge_form([fill])
|
|
error, = errors
|
|
assert error.level >= logging.WARNING
|
|
assert error.yaml_index == 0
|
|
assert error.name == name
|
|
|
|
@pytest.mark.parametrize('value', ['', ' ', 'readwrite'])
|
|
def test_merge_readonly_field(value):
|
|
fill = [{
|
|
'fdf': {'name': 'topform.text2_R'},
|
|
'value': value,
|
|
}]
|
|
_, errors = merge_form(fill)
|
|
error, = errors
|
|
assert error.level >= logging.WARNING
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'topform.text2_R'
|
|
|
|
@pytest.mark.parametrize('value', [None, True, 'Yes'])
|
|
def test_merge_nonterminal_field(value):
|
|
yaml_fills = [{
|
|
'fdf': {'name': 'topform.button1'},
|
|
'value': value,
|
|
}]
|
|
_, errors = merge_form(yaml_fills)
|
|
if value is None:
|
|
assert not errors
|
|
else:
|
|
error, = errors
|
|
assert error.level >= logging.WARNING
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'topform.button1'
|
|
|
|
@pytest.mark.parametrize('value', [None, True, 'Yes'])
|
|
def test_merge_unsupported_field_type(value):
|
|
yaml_fills = [{
|
|
'fdf': {'name': 'topform.submit', 'type': 'Btn'},
|
|
'value': value,
|
|
}]
|
|
_, errors = merge_form(yaml_fills)
|
|
if value is None:
|
|
assert not errors
|
|
else:
|
|
error, = errors
|
|
assert error.level >= logging.WARNING
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'topform.submit'
|
|
|
|
@pytest.mark.parametrize('value', [True, False, [], {}])
|
|
def test_merge_unsupported_text_value(value):
|
|
yaml_fills = [{
|
|
'fdf': {'name': 'topform.text1_0'},
|
|
'value': value,
|
|
}]
|
|
_, errors = merge_form(yaml_fills)
|
|
error, = errors
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'topform.text1_0'
|
|
|
|
@pytest.mark.parametrize('value', ['', 'Off', 'Yes', [], {}])
|
|
def test_merge_unsupported_checkbox_value(value):
|
|
yaml_fills = [{
|
|
'fdf': {'name': 'topform.button1.button1_0'},
|
|
'value': value,
|
|
}]
|
|
_, errors = merge_form(yaml_fills)
|
|
error, = errors
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'topform.button1.button1_0'
|
|
|
|
def test_generate():
|
|
source = [
|
|
{'fdf': {'name': 'form.text', 'type': 'Tx'}, 'value': 'generated'},
|
|
{'fdf': {'name': 'form.button', 'type': 'Btn'}, 'value': True},
|
|
]
|
|
actual, errors = fillmod.generate_form(source)
|
|
assert not errors
|
|
form_root, = actual
|
|
assert form_root['T'] == 'form'
|
|
assert 'V' not in form_root
|
|
text, checkbox = form_root['Kids']
|
|
assert text['T'] == 'text'
|
|
assert text['V'] == 'generated'
|
|
assert not text.get('Kids')
|
|
assert checkbox['T'] == 'button'
|
|
assert checkbox['V'].name == 'Yes'
|
|
assert not checkbox.get('Kids')
|
|
|
|
@pytest.mark.parametrize('options,value', [
|
|
(['1'], True),
|
|
(['1'], False),
|
|
(['On', 'Off'], True),
|
|
(['On', 'Off'], False),
|
|
])
|
|
def test_generate_checkbox_with_options(options, value):
|
|
source = [{
|
|
'fdf': {'name': 'cbox', 'type': 'Btn', 'options': options},
|
|
'value': value,
|
|
}]
|
|
actual, errors = fillmod.generate_form(source)
|
|
assert not errors
|
|
assert actual[0]['V'].name == (options[0] if value else 'Off')
|
|
|
|
@pytest.mark.parametrize('yaml_type', [None, 'Ch', 'Sig'])
|
|
def test_generate_unsupported_field_type(yaml_type):
|
|
source = [{
|
|
'fdf': {'name': 'badtype', 'type': yaml_type},
|
|
'value': 'unsupported type value',
|
|
}]
|
|
if yaml_type is None:
|
|
del source[0]['fdf']['type']
|
|
_, errors = fillmod.generate_form(source)
|
|
error, = errors
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'badtype'
|
|
|
|
def test_generate_invalid_field_type():
|
|
source = [{
|
|
'fdf': {'name': 'badtype', 'type': '<unknown>'},
|
|
'value': 'unsupported type value',
|
|
}]
|
|
_, errors = fillmod.generate_form(source)
|
|
assert errors
|
|
found_msg = False
|
|
for error in errors:
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'badtype'
|
|
found_msg = found_msg or '<unknown>' in error.errdesc
|
|
assert found_msg, "no errors mentioned unknown field type"
|
|
|
|
@pytest.mark.parametrize('value', [True, False, [], {}])
|
|
def test_generate_unsupported_text_value(value):
|
|
source = [{
|
|
'fdf': {'name': 'badtext', 'type': 'Tx'},
|
|
'value': value,
|
|
}]
|
|
_, errors = fillmod.generate_form(source)
|
|
error, = errors
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'badtext'
|
|
|
|
@pytest.mark.parametrize('value', ['', 'Off', 'Yes', [], {}])
|
|
def test_generate_unsupported_checkbox_value(value):
|
|
source = [{
|
|
'fdf': {'name': 'badbutton', 'type': 'Btn'},
|
|
'value': value,
|
|
}]
|
|
_, errors = fillmod.generate_form(source)
|
|
error, = errors
|
|
assert error.level >= logging.ERROR
|
|
assert error.yaml_index == 0
|
|
assert error.name == 'badbutton'
|
|
|
|
def test_main_generate_fdf():
|
|
arglist = ['--output-file=-', str(testutil.test_path('pdfforms/form1_fill.yml'))]
|
|
stdout = io.BytesIO()
|
|
stderr = io.StringIO()
|
|
retcode = fillmod.main(arglist, stdout, stderr)
|
|
assert retcode == 0
|
|
assert not stderr.getvalue()
|
|
patterns = iter(expected_re(p) for p in [
|
|
b'/T (text1_0)',
|
|
b'/V (text 1.0)',
|
|
b'/T (button1_0)',
|
|
b'/V /1',
|
|
b'/T (text2_0)',
|
|
b'/V (text 2.0)',
|
|
b'/T (button2_1)',
|
|
b'/V /2',
|
|
])
|
|
pattern = next(patterns)
|
|
stdout.seek(0)
|
|
for line in stdout:
|
|
if re.search(pattern, line):
|
|
try:
|
|
pattern = next(patterns)
|
|
except StopIteration:
|
|
break
|
|
else:
|
|
pytest.fail(f"pattern {pattern!r} not found in FDF output")
|
|
|
|
@pytest.mark.skipUnless(PDFTK, "need pdftk installed")
|
|
@pytest.mark.xfail(reason="`pdftk fill_form` expects a full PDF")
|
|
def test_main_fill_pdf():
|
|
arglist = [
|
|
'--pdftk', PDFTK,
|
|
'--output-file', '-',
|
|
str(testutil.test_path('pdfforms/form1_fill.yml')),
|
|
str(testutil.test_path('pdfforms/form1.fdf')),
|
|
]
|
|
stdout = io.BytesIO()
|
|
stderr = io.StringIO()
|
|
retcode = fillmod.main(arglist, stdout, stderr)
|
|
assert retcode == 0
|
|
assert not stderr.getvalue()
|
|
patterns = iter(expected_re(p) for p in [
|
|
b'/T (text1_0)',
|
|
b'/V (text 1.0)',
|
|
b'/T (button1_0)',
|
|
b'/V /1',
|
|
b'/T (text2_0)',
|
|
b'/V (text 2.0)',
|
|
b'/T (button2_1)',
|
|
b'/V /2',
|
|
])
|
|
pattern = next(patterns)
|
|
stdout.seek(0)
|
|
for line in stdout:
|
|
if re.search(pattern, line):
|
|
try:
|
|
pattern = next(patterns)
|
|
except StopIteration:
|
|
break
|
|
else:
|
|
pytest.fail(f"pattern {pattern!r} not found in FDF output")
|