reconcile: Add special case for payee first word match.
This commit is contained in:
parent
32fc4517a0
commit
a3e60c639f
2 changed files with 27 additions and 2 deletions
|
@ -89,6 +89,7 @@ JUNK_WORDS = [
|
||||||
'online',
|
'online',
|
||||||
'donation',
|
'donation',
|
||||||
'usd',
|
'usd',
|
||||||
|
'inc',
|
||||||
]
|
]
|
||||||
JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS]
|
JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS]
|
||||||
ZERO_RE = re.compile('^0+')
|
ZERO_RE = re.compile('^0+')
|
||||||
|
@ -182,6 +183,21 @@ def sort_records(records: List) -> List:
|
||||||
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
||||||
|
|
||||||
|
|
||||||
|
def first_word_exact_match(a, b):
|
||||||
|
if len(a) == 0 or len(b) == 0:
|
||||||
|
return 0
|
||||||
|
first_a = a.split()[0].strip()
|
||||||
|
first_b = b.split()[0].strip()
|
||||||
|
if first_a.casefold() == first_b.casefold():
|
||||||
|
return min(1.0, 0.2 * len(first_a))
|
||||||
|
else:
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
def payee_match(a, b):
|
||||||
|
fuzzy_match = fuzz.token_set_ratio(a, b) / 100.00
|
||||||
|
first_word_match = first_word_exact_match(a, b)
|
||||||
|
return max(fuzzy_match, first_word_match)
|
||||||
|
|
||||||
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
||||||
"""Do these records represent the same transaction?"""
|
"""Do these records represent the same transaction?"""
|
||||||
|
|
||||||
|
@ -204,7 +220,8 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
||||||
else:
|
else:
|
||||||
check_score = 0.0
|
check_score = 0.0
|
||||||
|
|
||||||
payee_score = fuzz.token_set_ratio(r1['payee'], r2['payee']) / 100.00
|
payee_score = payee_match(r1['payee'], r2['payee'])
|
||||||
|
|
||||||
if check_score == 1.0 or payee_score > 0.8:
|
if check_score == 1.0 or payee_score > 0.8:
|
||||||
payee_message = ''
|
payee_message = ''
|
||||||
elif payee_score > 0.4:
|
elif payee_score > 0.4:
|
||||||
|
@ -387,7 +404,7 @@ def main(args):
|
||||||
|
|
||||||
out = io.StringIO()
|
out = io.StringIO()
|
||||||
print('-' * 155)
|
print('-' * 155)
|
||||||
print(f'{"Statement transaction":<38} {"Books transaction":<44} Notes')
|
print(f'{"Statement transaction":<52} {"Books transaction":<58} Notes')
|
||||||
print('-' * 155)
|
print('-' * 155)
|
||||||
for _, output in sorted(matches):
|
for _, output in sorted(matches):
|
||||||
print(output)
|
print(output)
|
||||||
|
|
|
@ -6,6 +6,7 @@ from conservancy_beancount.reconcile.prototype_amex_reconciler import (
|
||||||
remove_payee_junk,
|
remove_payee_junk,
|
||||||
date_proximity,
|
date_proximity,
|
||||||
remove_duplicate_words,
|
remove_duplicate_words,
|
||||||
|
payee_match,
|
||||||
)
|
)
|
||||||
|
|
||||||
S1 = {
|
S1 = {
|
||||||
|
@ -178,3 +179,10 @@ def test_date_proximity():
|
||||||
|
|
||||||
def test_remove_duplicate_words():
|
def test_remove_duplicate_words():
|
||||||
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
||||||
|
|
||||||
|
def test_remove_duplicate_words():
|
||||||
|
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
||||||
|
|
||||||
|
def test_payee_matches_when_first_word_matches():
|
||||||
|
assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
|
||||||
|
assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
|
||||||
|
|
Loading…
Reference in a new issue