hooks.add_entity: Better handle common name prefix parts.
Keep these with the name they're attached to, rather than breaking the name in the middle.
This commit is contained in:
parent
f0dafcaed9
commit
f888b13c56
2 changed files with 10 additions and 0 deletions
|
@ -2,6 +2,11 @@ import re
|
|||
import unicodedata
|
||||
|
||||
class AddEntityHook:
|
||||
NAME_PREFIXES = frozenset([
|
||||
'da',
|
||||
'de',
|
||||
'van',
|
||||
])
|
||||
NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
|
||||
NONALNUM_RE = re.compile(r'[^-\w]')
|
||||
OPEN_PARENS = ['\\(', '\\[', '\\{']
|
||||
|
@ -41,6 +46,8 @@ class AddEntityHook:
|
|||
if not parts:
|
||||
return ''
|
||||
parts.insert(0, parts.pop())
|
||||
if parts[-1].lower() in self.NAME_PREFIXES:
|
||||
parts.insert(0, parts.pop())
|
||||
return '-'.join(parts)
|
||||
|
||||
def run(self, data):
|
||||
|
|
|
@ -19,6 +19,9 @@ def test_load_all():
|
|||
('Alex(Nickname) Smith', 'Smith-Alex'),
|
||||
('稲荷', '稲荷'),
|
||||
('Pøweł', 'Powel'),
|
||||
('Elyse Jan Smith', 'Smith-Elyse-Jan'),
|
||||
('Jan van Smith', 'van-Smith-Jan'),
|
||||
('Francis da Silva', 'da-Silva-Francis'),
|
||||
])
|
||||
def test_add_entity(payee, expected):
|
||||
data = {'payee': payee}
|
||||
|
|
Loading…
Reference in a new issue