hooks.add_entity: Better handle common name prefix parts.

Keep these with the name they're attached to, rather than breaking the name
in the middle.
This commit is contained in:
Brett Smith 2017-10-22 14:05:56 -04:00
parent f0dafcaed9
commit f888b13c56
2 changed files with 10 additions and 0 deletions

View file

@ -2,6 +2,11 @@ import re
import unicodedata import unicodedata
class AddEntityHook: class AddEntityHook:
NAME_PREFIXES = frozenset([
'da',
'de',
'van',
])
NONASCII_RE = re.compile(r'[^-A-Za-z0-9]') NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
NONALNUM_RE = re.compile(r'[^-\w]') NONALNUM_RE = re.compile(r'[^-\w]')
OPEN_PARENS = ['\\(', '\\[', '\\{'] OPEN_PARENS = ['\\(', '\\[', '\\{']
@ -41,6 +46,8 @@ class AddEntityHook:
if not parts: if not parts:
return '' return ''
parts.insert(0, parts.pop()) parts.insert(0, parts.pop())
if parts[-1].lower() in self.NAME_PREFIXES:
parts.insert(0, parts.pop())
return '-'.join(parts) return '-'.join(parts)
def run(self, data): def run(self, data):

View file

@ -19,6 +19,9 @@ def test_load_all():
('Alex(Nickname) Smith', 'Smith-Alex'), ('Alex(Nickname) Smith', 'Smith-Alex'),
('稲荷', '稲荷'), ('稲荷', '稲荷'),
('Pøweł', 'Powel'), ('Pøweł', 'Powel'),
('Elyse Jan Smith', 'Smith-Elyse-Jan'),
('Jan van Smith', 'van-Smith-Jan'),
('Francis da Silva', 'da-Silva-Francis'),
]) ])
def test_add_entity(payee, expected): def test_add_entity(payee, expected):
data = {'payee': payee} data = {'payee': payee}