import2ledger/import2ledger/hooks/add_entity.py

72 lines
2.2 KiB
Python

import re
import unicodedata
class AddEntityHook:
NAME_PREFIXES = frozenset([
'da',
'de',
'der',
'la',
'van',
])
NONASCII_RE = re.compile(r'[^-A-Za-z0-9]')
NONALNUM_RE = re.compile(r'[^-\w]')
OPEN_PARENS = ['\\(', '\\[', '\\{']
CLOSE_PARENS = ['\\)', '\\]', '\\}']
NO_PARENS = '[^{}]*'.format(''.join(OPEN_PARENS + CLOSE_PARENS))
def __init__(self, config):
pass
def _remove_parens(self, s):
last_s = None
while s != last_s:
last_s = s
for open_c, close_c in zip(self.OPEN_PARENS, self.CLOSE_PARENS):
s = re.sub(open_c + self.NO_PARENS + close_c, '', s)
return s if s else last_s
def _destroke_chr(self, c):
name = unicodedata.name(c, '')
if name.endswith(' WITH STROKE'):
return unicodedata.lookup(name[:-12])
else:
return c
def _destroke(self, s):
return ''.join(self._destroke_chr(c) for c in s)
def _entity_parts(self, s, trim_re):
for word in s.split():
word = unicodedata.normalize('NFKD', word)
word = trim_re.sub('', word)
if word:
yield word
def _str2entity(self, s, trim_re, name_shifts):
parts = list(self._entity_parts(s, trim_re))
if name_shifts > 0:
pivot = -name_shifts - 1
try:
while parts[pivot].lower() in self.NAME_PREFIXES:
pivot -= 1
except IndexError:
pass
else:
pivot += 1
parts = parts[pivot:] + parts[:pivot]
return '-'.join(parts)
def _name2entity(self, name, name_shifts):
name = self._remove_parens(name)
name = self._destroke(name)
entity = self._str2entity(name, self.NONASCII_RE, name_shifts)
if not entity:
entity = self._str2entity(name, self.NONALNUM_RE, 0)
return entity
def run(self, data):
if ('payee' in data) and ('entity' not in data):
data['entity'] = self._name2entity(data['payee'], 1)
if ('corporation' in data) and ('corp_entity' not in data):
data['corp_entity'] = self._name2entity(data['corporation'], 0)