hooks.add_entity: ASCIIfy stroke characters.

This commit is contained in:
Brett Smith 2017-10-22 13:58:18 -04:00
parent 5c73c40bcc
commit f0dafcaed9
2 changed files with 12 additions and 0 deletions

View file

@ -19,6 +19,16 @@ class AddEntityHook:
s = re.sub(open_c + self.NO_PARENS + close_c, '', s) s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
return s if s else last_s return s if s else last_s
def _destroke_chr(self, c):
name = unicodedata.name(c, '')
if name.endswith(' WITH STROKE'):
return unicodedata.lookup(name[:-12])
else:
return c
def _destroke(self, s):
return ''.join(self._destroke_chr(c) for c in s)
def _entity_parts(self, s, trim_re): def _entity_parts(self, s, trim_re):
for word in s.split(): for word in s.split():
word = unicodedata.normalize('NFKD', word) word = unicodedata.normalize('NFKD', word)
@ -36,6 +46,7 @@ class AddEntityHook:
def run(self, data): def run(self, data):
if ('payee' in data) and ('entity' not in data): if ('payee' in data) and ('entity' not in data):
payee = self._remove_parens(data['payee']) payee = self._remove_parens(data['payee'])
payee = self._destroke(payee)
entity = self._str2entity(payee, self.NONASCII_RE) entity = self._str2entity(payee, self.NONASCII_RE)
if not entity: if not entity:
entity = self._str2entity(payee, self.NONALNUM_RE) entity = self._str2entity(payee, self.NONALNUM_RE)

View file

@ -18,6 +18,7 @@ def test_load_all():
('Fran Doe-Smith', 'Doe-Smith-Fran'), ('Fran Doe-Smith', 'Doe-Smith-Fran'),
('Alex(Nickname) Smith', 'Smith-Alex'), ('Alex(Nickname) Smith', 'Smith-Alex'),
('稲荷', '稲荷'), ('稲荷', '稲荷'),
('Pøweł', 'Powel'),
]) ])
def test_add_entity(payee, expected): def test_add_entity(payee, expected):
data = {'payee': payee} data = {'payee': payee}