hooks.add_entity: Better handle common name prefix parts.
Keep these with the name they're attached to, rather than breaking the name in the middle.
This commit is contained in:
parent
f0dafcaed9
commit
f888b13c56
2 changed files with 10 additions and 0 deletions
|
@ -2,6 +2,11 @@ import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
class AddEntityHook:
|
class AddEntityHook:
|
||||||
|
NAME_PREFIXES = frozenset([
|
||||||
|
'da',
|
||||||
|
'de',
|
||||||
|
'van',
|
||||||
|
])
|
||||||
NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
|
NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
|
||||||
NONALNUM_RE = re.compile(r'[^-\w]')
|
NONALNUM_RE = re.compile(r'[^-\w]')
|
||||||
OPEN_PARENS = ['\\(', '\\[', '\\{']
|
OPEN_PARENS = ['\\(', '\\[', '\\{']
|
||||||
|
@ -41,6 +46,8 @@ class AddEntityHook:
|
||||||
if not parts:
|
if not parts:
|
||||||
return ''
|
return ''
|
||||||
parts.insert(0, parts.pop())
|
parts.insert(0, parts.pop())
|
||||||
|
if parts[-1].lower() in self.NAME_PREFIXES:
|
||||||
|
parts.insert(0, parts.pop())
|
||||||
return '-'.join(parts)
|
return '-'.join(parts)
|
||||||
|
|
||||||
def run(self, data):
|
def run(self, data):
|
||||||
|
|
|
@ -19,6 +19,9 @@ def test_load_all():
|
||||||
('Alex(Nickname) Smith', 'Smith-Alex'),
|
('Alex(Nickname) Smith', 'Smith-Alex'),
|
||||||
('稲荷', '稲荷'),
|
('稲荷', '稲荷'),
|
||||||
('Pøweł', 'Powel'),
|
('Pøweł', 'Powel'),
|
||||||
|
('Elyse Jan Smith', 'Smith-Elyse-Jan'),
|
||||||
|
('Jan van Smith', 'van-Smith-Jan'),
|
||||||
|
('Francis da Silva', 'da-Silva-Francis'),
|
||||||
])
|
])
|
||||||
def test_add_entity(payee, expected):
|
def test_add_entity(payee, expected):
|
||||||
data = {'payee': payee}
|
data = {'payee': payee}
|
||||||
|
|
Loading…
Reference in a new issue