From f0dafcaed9e9c68964d34145863c3d1a97d0bebc Mon Sep 17 00:00:00 2001 From: Brett Smith Date: Sun, 22 Oct 2017 13:58:18 -0400 Subject: [PATCH] hooks.add_entity: ASCIIfy stroke characters. --- import2ledger/hooks/add_entity.py | 11 +++++++++++ tests/test_hooks.py | 1 + 2 files changed, 12 insertions(+) diff --git a/import2ledger/hooks/add_entity.py b/import2ledger/hooks/add_entity.py index b6583f1..d0f4791 100644 --- a/import2ledger/hooks/add_entity.py +++ b/import2ledger/hooks/add_entity.py @@ -19,6 +19,16 @@ class AddEntityHook: s = re.sub(open_c + self.NO_PARENS + close_c, '', s) return s if s else last_s + def _destroke_chr(self, c): + name = unicodedata.name(c, '') + if name.endswith(' WITH STROKE'): + return unicodedata.lookup(name[:-12]) + else: + return c + + def _destroke(self, s): + return ''.join(self._destroke_chr(c) for c in s) + def _entity_parts(self, s, trim_re): for word in s.split(): word = unicodedata.normalize('NFKD', word) @@ -36,6 +46,7 @@ class AddEntityHook: def run(self, data): if ('payee' in data) and ('entity' not in data): payee = self._remove_parens(data['payee']) + payee = self._destroke(payee) entity = self._str2entity(payee, self.NONASCII_RE) if not entity: entity = self._str2entity(payee, self.NONALNUM_RE) diff --git a/tests/test_hooks.py b/tests/test_hooks.py index 1a7aa3c..d9f68a3 100644 --- a/tests/test_hooks.py +++ b/tests/test_hooks.py @@ -18,6 +18,7 @@ def test_load_all(): ('Fran Doe-Smith', 'Doe-Smith-Fran'), ('Alex(Nickname) Smith', 'Smith-Alex'), ('稲荷', '稲荷'), + ('Pøweł', 'Powel'), ]) def test_add_entity(payee, expected): data = {'payee': payee}