meta_entity: Adjust what entities are allowed based on today's books.
See the comments throughout for more discussion about what cases are or aren't allowed, and why.
This commit is contained in:
parent
90a58ef112
commit
499f18ff62
3 changed files with 52 additions and 3 deletions
|
@ -14,7 +14,12 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import re
|
# Type stubs aren't available for regex.
|
||||||
|
# Fortunately, we're using it in a way that's API-compatible with the re
|
||||||
|
# module. We mitigate the lack of type stubs by providing type declarations
|
||||||
|
# for returned objects. This way, the only thing that isn't type checked are
|
||||||
|
# the calls to regex functions.
|
||||||
|
import regex # type:ignore[import]
|
||||||
|
|
||||||
from . import core
|
from . import core
|
||||||
from .. import data
|
from .. import data
|
||||||
|
@ -23,10 +28,25 @@ from ..beancount_types import (
|
||||||
Transaction,
|
Transaction,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from typing import (
|
||||||
|
Pattern,
|
||||||
|
)
|
||||||
|
|
||||||
class MetaEntity(core.TransactionHook):
|
class MetaEntity(core.TransactionHook):
|
||||||
METADATA_KEY = 'entity'
|
METADATA_KEY = 'entity'
|
||||||
HOOK_GROUPS = frozenset(['posting', 'metadata', METADATA_KEY])
|
HOOK_GROUPS = frozenset(['posting', 'metadata', METADATA_KEY])
|
||||||
ENTITY_RE = re.compile(r'^[A-Za-z0-9]+(?:-[A-Za-z0-9]+)*$')
|
|
||||||
|
# alnum is the set of characters we always accept in entity metadata:
|
||||||
|
# letters and digits, minus the Latin 1 supplement (i.e., Roman letters
|
||||||
|
# with diacritics: áÁàÀâÂåÅäÄãà çÇ ðÐ ñÑ øØ ß etc.)
|
||||||
|
# See the tests for specific cases.
|
||||||
|
alnum = r'\p{Letter}\p{Digit}--\p{Block=Latin_1_Supplement}'
|
||||||
|
# A regexp that would be reasonably stricter would be:
|
||||||
|
# f'^[{alnum}][.{alnum}]*(?:-[.{alnum}])*$'
|
||||||
|
# However, current producers fail that regexp in a few different ways.
|
||||||
|
# See the tests for specific cases.
|
||||||
|
ENTITY_RE: Pattern[str] = regex.compile(f'^[{alnum}][-.{alnum}]*$', regex.VERSION1)
|
||||||
|
del alnum
|
||||||
|
|
||||||
def run(self, txn: Transaction) -> errormod.Iter:
|
def run(self, txn: Transaction) -> errormod.Iter:
|
||||||
txn_entity = txn.meta.get(self.METADATA_KEY)
|
txn_entity = txn.meta.get(self.METADATA_KEY)
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -12,6 +12,7 @@ setup(
|
||||||
|
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'beancount>=2.2',
|
'beancount>=2.2',
|
||||||
|
'regex',
|
||||||
'rt>=2.0',
|
'rt>=2.0',
|
||||||
],
|
],
|
||||||
setup_requires=[
|
setup_requires=[
|
||||||
|
|
|
@ -21,19 +21,47 @@ from . import testutil
|
||||||
from conservancy_beancount.plugin import meta_entity
|
from conservancy_beancount.plugin import meta_entity
|
||||||
|
|
||||||
VALID_VALUES = {
|
VALID_VALUES = {
|
||||||
|
# Classic entity: LastName-FirstName
|
||||||
'Smith-Alex',
|
'Smith-Alex',
|
||||||
|
# Various people and companies have one-word names
|
||||||
|
# Digits are allowed, as part of a name or standalone
|
||||||
'Company19',
|
'Company19',
|
||||||
|
'Company-19',
|
||||||
|
# No case requirements
|
||||||
'boyd-danah',
|
'boyd-danah',
|
||||||
|
# No limit on the number of parts of the name
|
||||||
'B-van-der-A',
|
'B-van-der-A',
|
||||||
|
# Names that have no ASCII are allowed, with or without dash separators
|
||||||
|
'田中流星',
|
||||||
|
'田中-流星',
|
||||||
|
'スミスダコタ',
|
||||||
|
'スミス-ダコタ',
|
||||||
|
'Яшин-Данила',
|
||||||
|
# The PayPal importer produces . in entity metadata
|
||||||
|
'Du-Bois-W.-E.-B.',
|
||||||
|
# import2ledger produces entities that end with -
|
||||||
|
# That's probably a bug, but allow it for now.
|
||||||
|
'foo-',
|
||||||
}
|
}
|
||||||
|
|
||||||
INVALID_VALUES = {
|
INVALID_VALUES = {
|
||||||
|
# Starting with a - is not allowed
|
||||||
'-foo',
|
'-foo',
|
||||||
'foo-',
|
|
||||||
'-',
|
'-',
|
||||||
|
# Names that can be reduced to ASCII should be
|
||||||
|
# Producers should change this to Uberentity or Ueberentity
|
||||||
|
# I am not wild about this rule and would like to relax it—it's mostly
|
||||||
|
# based on an expectation that entities are typed in by a human. That's true
|
||||||
|
# less and less and it seems like we should reduce the amount of mangling
|
||||||
|
# producers are expected to do. But it's the rule for today.
|
||||||
'Überentity',
|
'Überentity',
|
||||||
|
# Whitespace is never allowed
|
||||||
'Alex Smith',
|
'Alex Smith',
|
||||||
|
'田中 流星',
|
||||||
|
'スミス ダコタ',
|
||||||
|
'Яшин Данила',
|
||||||
' ',
|
' ',
|
||||||
|
# An empty string is not valid
|
||||||
'',
|
'',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue