hooks.add_entity: ASCIIfy stroke characters.
This commit is contained in:
parent
5c73c40bcc
commit
f0dafcaed9
2 changed files with 12 additions and 0 deletions
|
@ -19,6 +19,16 @@ class AddEntityHook:
|
||||||
s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
|
s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
|
||||||
return s if s else last_s
|
return s if s else last_s
|
||||||
|
|
||||||
|
def _destroke_chr(self, c):
|
||||||
|
name = unicodedata.name(c, '')
|
||||||
|
if name.endswith(' WITH STROKE'):
|
||||||
|
return unicodedata.lookup(name[:-12])
|
||||||
|
else:
|
||||||
|
return c
|
||||||
|
|
||||||
|
def _destroke(self, s):
|
||||||
|
return ''.join(self._destroke_chr(c) for c in s)
|
||||||
|
|
||||||
def _entity_parts(self, s, trim_re):
|
def _entity_parts(self, s, trim_re):
|
||||||
for word in s.split():
|
for word in s.split():
|
||||||
word = unicodedata.normalize('NFKD', word)
|
word = unicodedata.normalize('NFKD', word)
|
||||||
|
@ -36,6 +46,7 @@ class AddEntityHook:
|
||||||
def run(self, data):
|
def run(self, data):
|
||||||
if ('payee' in data) and ('entity' not in data):
|
if ('payee' in data) and ('entity' not in data):
|
||||||
payee = self._remove_parens(data['payee'])
|
payee = self._remove_parens(data['payee'])
|
||||||
|
payee = self._destroke(payee)
|
||||||
entity = self._str2entity(payee, self.NONASCII_RE)
|
entity = self._str2entity(payee, self.NONASCII_RE)
|
||||||
if not entity:
|
if not entity:
|
||||||
entity = self._str2entity(payee, self.NONALNUM_RE)
|
entity = self._str2entity(payee, self.NONALNUM_RE)
|
||||||
|
|
|
@ -18,6 +18,7 @@ def test_load_all():
|
||||||
('Fran Doe-Smith', 'Doe-Smith-Fran'),
|
('Fran Doe-Smith', 'Doe-Smith-Fran'),
|
||||||
('Alex(Nickname) Smith', 'Smith-Alex'),
|
('Alex(Nickname) Smith', 'Smith-Alex'),
|
||||||
('稲荷', '稲荷'),
|
('稲荷', '稲荷'),
|
||||||
|
('Pøweł', 'Powel'),
|
||||||
])
|
])
|
||||||
def test_add_entity(payee, expected):
|
def test_add_entity(payee, expected):
|
||||||
data = {'payee': payee}
|
data = {'payee': payee}
|
||||||
|
|
Loading…
Reference in a new issue