From f888b13c56b0b2a6bb92ffbcd624c6dad002d83d Mon Sep 17 00:00:00 2001 From: Brett Smith Date: Sun, 22 Oct 2017 14:05:56 -0400 Subject: [PATCH] hooks.add_entity: Better handle common name prefix parts. Keep these with the name they're attached to, rather than breaking the name in the middle. --- import2ledger/hooks/add_entity.py | 7 +++++++ tests/test_hooks.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/import2ledger/hooks/add_entity.py b/import2ledger/hooks/add_entity.py index d0f4791..d4f6191 100644 --- a/import2ledger/hooks/add_entity.py +++ b/import2ledger/hooks/add_entity.py @@ -2,6 +2,11 @@ import re import unicodedata class AddEntityHook: + NAME_PREFIXES = frozenset([ + 'da', + 'de', + 'van', + ]) NONASCII_RE = re.compile(r'[^-A-Za-z0-9]') NONALNUM_RE = re.compile(r'[^-\w]') OPEN_PARENS = ['\\(', '\\[', '\\{'] @@ -41,6 +46,8 @@ class AddEntityHook: if not parts: return '' parts.insert(0, parts.pop()) + if parts[-1].lower() in self.NAME_PREFIXES: + parts.insert(0, parts.pop()) return '-'.join(parts) def run(self, data): diff --git a/tests/test_hooks.py b/tests/test_hooks.py index d9f68a3..270209f 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -19,6 +19,9 @@ def test_load_all(): ('Alex(Nickname) Smith', 'Smith-Alex'), ('稲荷', '稲荷'), ('Pøweł', 'Powel'), + ('Elyse Jan Smith', 'Smith-Elyse-Jan'), + ('Jan van Smith', 'van-Smith-Jan'), + ('Francis da Silva', 'da-Silva-Francis'), ]) def test_add_entity(payee, expected): data = {'payee': payee}