meta_entity: More battle testing.
See the test cases for examples of real entities in the books that we should accept for now.
This commit is contained in:
parent
072937eff5
commit
56b644f1db
2 changed files with 81 additions and 28 deletions
|
@ -25,39 +25,51 @@ from . import core
|
|||
from .. import data
|
||||
from .. import errors as errormod
|
||||
from ..beancount_types import (
|
||||
MetaKey,
|
||||
MetaValue,
|
||||
Transaction,
|
||||
)
|
||||
|
||||
from typing import (
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Pattern,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
class MetaEntity(core.TransactionHook):
|
||||
METADATA_KEY = 'entity'
|
||||
HOOK_GROUPS = frozenset(['posting', 'metadata', METADATA_KEY])
|
||||
|
||||
# alnum is the set of characters we always accept in entity metadata:
|
||||
# letters and digits, minus the Latin 1 supplement (i.e., Roman letters
|
||||
# with diacritics: áÁàÀâÂåÅäÄãà çÇ ðÐ ñÑ øØ ß etc.)
|
||||
# chars is the set of characters we always accept in entity metadata:
|
||||
# letters, digits, and ASCII punctuation, except `-` and the Latin 1 supplement
|
||||
# (i.e., Roman letters with diacritics: áÁàÀâÂåÅäÄãà çÇ ðÐ ñÑ øØ ß etc.)
|
||||
# See the tests for specific cases.
|
||||
alnum = r'\p{Letter}\p{Digit}--\p{Block=Latin_1_Supplement}'
|
||||
# A regexp that would be reasonably stricter would be:
|
||||
# f'^[{alnum}][.{alnum}]*(?:-[.{alnum}])*$'
|
||||
# However, current producers fail that regexp in a few different ways.
|
||||
# See the tests for specific cases.
|
||||
ENTITY_RE: Pattern[str] = regex.compile(f'^[{alnum}][-.{alnum}]*$', regex.VERSION1)
|
||||
del alnum
|
||||
chars = r'\u0021-\u002c\u002e-\u007e\p{Letter}\p{Digit}--\p{Block=Latin_1_Supplement}'
|
||||
ENTITY_RE: Pattern[str] = regex.compile(f'^[{chars}][-{chars}]*$', regex.VERSION1)
|
||||
ANONYMOUS_RE: Pattern[str] = regex.compile(r'^[-_.?!\s]*$', regex.VERSION1)
|
||||
del chars
|
||||
|
||||
def _check_entity(self,
|
||||
meta: MutableMapping[MetaKey, MetaValue],
|
||||
default: Optional[str]=None,
|
||||
) -> Tuple[Optional[str], Optional[bool]]:
|
||||
entity = meta.get(self.METADATA_KEY, default)
|
||||
if entity is None:
|
||||
return None, None
|
||||
elif not isinstance(entity, str):
|
||||
return None, False
|
||||
elif self.ANONYMOUS_RE.match(entity):
|
||||
entity = 'Anonymous'
|
||||
meta[self.METADATA_KEY] = entity
|
||||
return entity, True
|
||||
else:
|
||||
return entity, self.ENTITY_RE.match(entity) is not None
|
||||
|
||||
def run(self, txn: Transaction) -> errormod.Iter:
|
||||
if data.is_opening_balance_txn(txn):
|
||||
return
|
||||
txn_entity = txn.meta.get(self.METADATA_KEY, txn.payee)
|
||||
if txn_entity is None:
|
||||
txn_entity_ok = None
|
||||
elif isinstance(txn_entity, str):
|
||||
txn_entity_ok = bool(self.ENTITY_RE.match(txn_entity))
|
||||
else:
|
||||
txn_entity_ok = False
|
||||
txn_entity, txn_entity_ok = self._check_entity(txn.meta, txn.payee)
|
||||
if txn_entity_ok is False:
|
||||
yield errormod.InvalidMetadataError(txn, self.METADATA_KEY, txn_entity)
|
||||
for post in data.Posting.from_txn(txn):
|
||||
|
@ -68,10 +80,8 @@ class MetaEntity(core.TransactionHook):
|
|||
'Liabilities:Payable',
|
||||
):
|
||||
continue
|
||||
entity = post.meta.get(self.METADATA_KEY)
|
||||
if entity is None:
|
||||
yield errormod.InvalidMetadataError(txn, self.METADATA_KEY, entity, post)
|
||||
elif entity is txn_entity:
|
||||
entity, entity_ok = self._check_entity(post.meta, txn_entity)
|
||||
if entity is txn_entity and entity is not None:
|
||||
pass
|
||||
elif not self.ENTITY_RE.match(entity):
|
||||
elif not entity_ok:
|
||||
yield errormod.InvalidMetadataError(txn, self.METADATA_KEY, entity, post)
|
||||
|
|
|
@ -37,8 +37,14 @@ VALID_VALUES = {
|
|||
'スミスダコタ',
|
||||
'スミス-ダコタ',
|
||||
'Яшин-Данила',
|
||||
# The PayPal importer produces . in entity metadata
|
||||
# Governments, using : as a hierarchy separator
|
||||
'BE',
|
||||
'US:KY',
|
||||
'CA:ON',
|
||||
# The PayPal importer allows ASCII punctuation in entity metadata
|
||||
'Du-Bois-W.-E.-B.',
|
||||
"O'Malley-Thomas",
|
||||
'O`Malley-Thomas',
|
||||
# import2ledger produces entities that end with -
|
||||
# That's probably a bug, but allow it for now.
|
||||
'foo-',
|
||||
|
@ -47,7 +53,6 @@ VALID_VALUES = {
|
|||
INVALID_VALUES = {
|
||||
# Starting with a - is not allowed
|
||||
'-foo',
|
||||
'-',
|
||||
# Names that can be reduced to ASCII should be
|
||||
# Producers should change this to Uberentity or Ueberentity
|
||||
# I am not wild about this rule and would like to relax it—it's mostly
|
||||
|
@ -56,14 +61,25 @@ INVALID_VALUES = {
|
|||
# mangling producers are expected to do. But it's the rule for today.
|
||||
'Überentity',
|
||||
# Whitespace is never allowed
|
||||
' ',
|
||||
'Alex Smith',
|
||||
'田中\u00A0流星', # Non-breaking space
|
||||
# The only punctuation allowed is - and .
|
||||
'スミス_ダコタ',
|
||||
# Non-ASCII punctuation is not allowed
|
||||
'Яшин—Данила', # em dash
|
||||
# An empty string is not valid
|
||||
'O’Malley-Thomas', # Right-angled apostrophe
|
||||
'Du-Bois-W。-E。-B。', # Japanese period
|
||||
}
|
||||
|
||||
ANONYMOUS_VALUES = {
|
||||
# Values produced by various importers that should be translated to
|
||||
# Anonymous.
|
||||
'',
|
||||
' ',
|
||||
'-',
|
||||
'--',
|
||||
'-----',
|
||||
'_',
|
||||
' _ ',
|
||||
'.',
|
||||
}
|
||||
|
||||
TEST_KEY = 'entity'
|
||||
|
@ -81,6 +97,15 @@ def test_valid_values_on_postings(hook, src_value):
|
|||
])
|
||||
assert not any(hook.run(txn))
|
||||
|
||||
@pytest.mark.parametrize('src_value', ANONYMOUS_VALUES)
|
||||
def test_anonymous_values_on_postings(hook, src_value):
|
||||
txn = testutil.Transaction(postings=[
|
||||
('Assets:Cash', -25),
|
||||
('Expenses:General', 25, {TEST_KEY: src_value}),
|
||||
])
|
||||
assert not any(hook.run(txn))
|
||||
assert txn.postings[-1].meta[TEST_KEY] == 'Anonymous'
|
||||
|
||||
@pytest.mark.parametrize('src_value', INVALID_VALUES)
|
||||
def test_invalid_values_on_postings(hook, src_value):
|
||||
txn = testutil.Transaction(postings=[
|
||||
|
@ -99,6 +124,15 @@ def test_valid_values_on_transactions(hook, src_value):
|
|||
])
|
||||
assert not any(hook.run(txn))
|
||||
|
||||
@pytest.mark.parametrize('src_value', ANONYMOUS_VALUES)
|
||||
def test_anonymous_values_on_transactions(hook, src_value):
|
||||
txn = testutil.Transaction(**{TEST_KEY: src_value}, postings=[
|
||||
('Assets:Cash', -25),
|
||||
('Expenses:General', 25),
|
||||
])
|
||||
assert not any(hook.run(txn))
|
||||
assert txn.meta[TEST_KEY] == 'Anonymous'
|
||||
|
||||
@pytest.mark.parametrize('src_value', INVALID_VALUES)
|
||||
def test_invalid_values_on_transactions(hook, src_value):
|
||||
txn = testutil.Transaction(**{TEST_KEY: src_value}, postings=[
|
||||
|
@ -118,6 +152,15 @@ def test_valid_values_on_payee(hook, src_value):
|
|||
])
|
||||
assert not any(hook.run(txn))
|
||||
|
||||
@pytest.mark.parametrize('src_value', ANONYMOUS_VALUES)
|
||||
def test_anonymous_values_on_payee(hook, src_value):
|
||||
txn = testutil.Transaction(payee=src_value, postings=[
|
||||
('Assets:Cash', -25),
|
||||
('Expenses:General', 25),
|
||||
])
|
||||
assert not any(hook.run(txn))
|
||||
assert txn.meta[TEST_KEY] == 'Anonymous'
|
||||
|
||||
@pytest.mark.parametrize('src_value', INVALID_VALUES)
|
||||
def test_invalid_values_on_payee(hook, src_value):
|
||||
txn = testutil.Transaction(payee=src_value, postings=[
|
||||
|
|
Loading…
Reference in a new issue