reconcile: Add special case for payee first word match.
This commit is contained in:
parent
32fc4517a0
commit
a3e60c639f
2 changed files with 27 additions and 2 deletions
|
@ -89,6 +89,7 @@ JUNK_WORDS = [
|
|||
'online',
|
||||
'donation',
|
||||
'usd',
|
||||
'inc',
|
||||
]
|
||||
JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS]
|
||||
ZERO_RE = re.compile('^0+')
|
||||
|
@ -182,6 +183,21 @@ def sort_records(records: List) -> List:
|
|||
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
||||
|
||||
|
||||
def first_word_exact_match(a, b):
|
||||
if len(a) == 0 or len(b) == 0:
|
||||
return 0
|
||||
first_a = a.split()[0].strip()
|
||||
first_b = b.split()[0].strip()
|
||||
if first_a.casefold() == first_b.casefold():
|
||||
return min(1.0, 0.2 * len(first_a))
|
||||
else:
|
||||
return 0;
|
||||
|
||||
def payee_match(a, b):
|
||||
fuzzy_match = fuzz.token_set_ratio(a, b) / 100.00
|
||||
first_word_match = first_word_exact_match(a, b)
|
||||
return max(fuzzy_match, first_word_match)
|
||||
|
||||
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
||||
"""Do these records represent the same transaction?"""
|
||||
|
||||
|
@ -204,7 +220,8 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
|||
else:
|
||||
check_score = 0.0
|
||||
|
||||
payee_score = fuzz.token_set_ratio(r1['payee'], r2['payee']) / 100.00
|
||||
payee_score = payee_match(r1['payee'], r2['payee'])
|
||||
|
||||
if check_score == 1.0 or payee_score > 0.8:
|
||||
payee_message = ''
|
||||
elif payee_score > 0.4:
|
||||
|
@ -387,7 +404,7 @@ def main(args):
|
|||
|
||||
out = io.StringIO()
|
||||
print('-' * 155)
|
||||
print(f'{"Statement transaction":<38} {"Books transaction":<44} Notes')
|
||||
print(f'{"Statement transaction":<52} {"Books transaction":<58} Notes')
|
||||
print('-' * 155)
|
||||
for _, output in sorted(matches):
|
||||
print(output)
|
||||
|
|
|
@ -6,6 +6,7 @@ from conservancy_beancount.reconcile.prototype_amex_reconciler import (
|
|||
remove_payee_junk,
|
||||
date_proximity,
|
||||
remove_duplicate_words,
|
||||
payee_match,
|
||||
)
|
||||
|
||||
S1 = {
|
||||
|
@ -178,3 +179,10 @@ def test_date_proximity():
|
|||
|
||||
def test_remove_duplicate_words():
|
||||
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
||||
|
||||
def test_remove_duplicate_words():
|
||||
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
||||
|
||||
def test_payee_matches_when_first_word_matches():
|
||||
assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
|
||||
assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
|
||||
|
|
Loading…
Reference in a new issue