reconcile: Implement "subset sum" feature matching multiple books entries to a single statement entry.
This commit is contained in:
parent
965aeabde9
commit
405dd553cb
2 changed files with 171 additions and 65 deletions
|
@ -49,6 +49,7 @@ import csv
|
||||||
import datetime
|
import datetime
|
||||||
import decimal
|
import decimal
|
||||||
import io
|
import io
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -57,6 +58,7 @@ from typing import Callable, Dict, List, Tuple, TextIO
|
||||||
|
|
||||||
from beancount import loader
|
from beancount import loader
|
||||||
from beancount.query.query import run_query
|
from beancount.query.query import run_query
|
||||||
|
from colorama import Fore, Style
|
||||||
|
|
||||||
if not sys.warnoptions:
|
if not sys.warnoptions:
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -164,21 +166,26 @@ def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def format_record(records: list[dict]) -> str:
|
def format_record(record: dict) -> str:
|
||||||
if len(records) == 1:
|
if record['payee'] and record['check_id']:
|
||||||
record = records[0]
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
|
||||||
|
elif record['payee']:
|
||||||
if record['payee'] and record['check_id']:
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
|
||||||
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
|
|
||||||
elif record['payee']:
|
|
||||||
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
|
|
||||||
else:
|
|
||||||
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
|
|
||||||
return output
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def format_multirecord(r1s, r2s, note):
|
||||||
|
total = sum(x['amount'] for x in r2s)
|
||||||
|
assert len(r1s) == 1
|
||||||
|
assert len(r2s) > 1
|
||||||
|
match_output = []
|
||||||
|
match_output.append([r1s[0]['date'].isoformat() + ' ' + r1s[0]['payee'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}'])
|
||||||
|
for i, r2 in enumerate(r2s[1:]):
|
||||||
|
match_output.append([r1s[0]['date'].isoformat() + str(i) + r1s[0]['payee'], f'{r1s[0]["date"].isoformat()}: ↳ → {format_record(r2)} ✓ Matched{note}'])
|
||||||
|
return match_output
|
||||||
|
|
||||||
def sort_records(records: List) -> List:
|
def sort_records(records: List) -> List:
|
||||||
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
||||||
|
|
||||||
|
@ -253,13 +260,7 @@ def match_statement_and_books(statement_trans: list, books_trans: list):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
matches = []
|
matches = []
|
||||||
# We need a realised list and should be a copy so we can safely delete
|
remaining_books_trans = []
|
||||||
# items.
|
|
||||||
books_trans = list(books_trans)
|
|
||||||
|
|
||||||
# We can delete the matched books trans, but seems not a good idea to delete
|
|
||||||
# while iterating through statement_trans. Instead pushing onto a separate
|
|
||||||
# list.
|
|
||||||
remaining_statement_trans = []
|
remaining_statement_trans = []
|
||||||
|
|
||||||
for r1 in statement_trans:
|
for r1 in statement_trans:
|
||||||
|
@ -278,12 +279,13 @@ def match_statement_and_books(statement_trans: list, books_trans: list):
|
||||||
if best_match_score <= 0.8:
|
if best_match_score <= 0.8:
|
||||||
best_match_note.append('only one decent match')
|
best_match_note.append('only one decent match')
|
||||||
matches.append(([r1], [books_trans[best_match_index]], best_match_note))
|
matches.append(([r1], [books_trans[best_match_index]], best_match_note))
|
||||||
|
# Don't try to make a second match against this books entry.
|
||||||
del books_trans[best_match_index]
|
del books_trans[best_match_index]
|
||||||
else:
|
else:
|
||||||
matches.append(([r1], [], ['no match']))
|
remaining_statement_trans.append(r1)
|
||||||
for r2 in books_trans:
|
for r2 in books_trans:
|
||||||
matches.append(([], [r2], ['no match']))
|
remaining_books_trans.append(r2)
|
||||||
return matches
|
return matches, remaining_statement_trans, remaining_books_trans
|
||||||
|
|
||||||
|
|
||||||
def format_matches(matches, csv_statement: str, show_reconciled_matches):
|
def format_matches(matches, csv_statement: str, show_reconciled_matches):
|
||||||
|
@ -292,12 +294,15 @@ def format_matches(matches, csv_statement: str, show_reconciled_matches):
|
||||||
note = ', '.join(note)
|
note = ', '.join(note)
|
||||||
note = ': ' + note if note else note
|
note = ': ' + note if note else note
|
||||||
if r1 and r2:
|
if r1 and r2:
|
||||||
if show_reconciled_matches:
|
if show_reconciled_matches and all(x['bank_statement'] for x in r2):
|
||||||
match_output.append([r1[0]['date'], f'{format_record(r1)} → {format_record(r2)} ✓ Matched{note}'])
|
if len(r2) == 1:
|
||||||
|
match_output.append([r1[0]['date'].isoformat() + r1[0]['payee'], f'{format_record(r1[0])} → {format_record(r2[0])} ✓ Matched{note}'])
|
||||||
|
else:
|
||||||
|
match_output.extend(format_multirecord(r1, r2, note))
|
||||||
elif r1:
|
elif r1:
|
||||||
match_output.append([r1[0]['date'], f'{format_record(r1)} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})'])
|
match_output.append([r1[0]['date'].isoformat() + r1[0]['payee'], Fore.RED + Style.BRIGHT + f'{format_record(r1[0])} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})' + Style.RESET_ALL])
|
||||||
else:
|
else:
|
||||||
match_output.append([r2[0]['date'], f'{" ":^59} → {format_record(r2)} ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})'])
|
match_output.append([r2[0]['date'].isoformat() + r2[0]['payee'], Fore.RED + Style.BRIGHT + f'{" ":^59} → {format_record(r2[0])} ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})' + Style.RESET_ALL])
|
||||||
return match_output
|
return match_output
|
||||||
|
|
||||||
|
|
||||||
|
@ -400,6 +405,51 @@ def totals(matches):
|
||||||
total_missing_from_statement += sum(c['amount'] for c in books_entries)
|
total_missing_from_statement += sum(c['amount'] for c in books_entries)
|
||||||
return total_matched, total_missing_from_books, total_missing_from_statement
|
return total_matched, total_missing_from_books, total_missing_from_statement
|
||||||
|
|
||||||
|
|
||||||
|
def subset_match(statement_trans, books_trans):
|
||||||
|
matches = []
|
||||||
|
remaining_books_trans = []
|
||||||
|
remaining_statement_trans = []
|
||||||
|
|
||||||
|
groups = itertools.groupby(books_trans, key=lambda x: (x['date'], x['payee']))
|
||||||
|
for k, group in groups:
|
||||||
|
best_match_score = 0
|
||||||
|
best_match_index = None
|
||||||
|
best_match_note = ''
|
||||||
|
matches_found = 0
|
||||||
|
|
||||||
|
group_items = list(group)
|
||||||
|
total = sum(x['amount'] for x in group_items)
|
||||||
|
r2 = copy.copy(group_items[0])
|
||||||
|
r2['amount'] = total
|
||||||
|
for i, r1 in enumerate(statement_trans):
|
||||||
|
score, note = records_match(r1, r2)
|
||||||
|
if score >= 0.5 and score >= best_match_score:
|
||||||
|
matches_found += 1
|
||||||
|
best_match_score = score
|
||||||
|
best_match_index = i
|
||||||
|
best_match_note = note
|
||||||
|
if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
|
||||||
|
if best_match_score <= 0.8:
|
||||||
|
best_match_note.append('only one decent match')
|
||||||
|
matches.append(([statement_trans[best_match_index]], group_items, best_match_note))
|
||||||
|
del statement_trans[best_match_index]
|
||||||
|
for item in group_items:
|
||||||
|
books_trans.remove(item)
|
||||||
|
else:
|
||||||
|
remaining_books_trans.append(r2)
|
||||||
|
for r1 in statement_trans:
|
||||||
|
remaining_statement_trans.append(r1)
|
||||||
|
return matches, remaining_statement_trans, remaining_books_trans
|
||||||
|
|
||||||
|
def process_unmatched(statement_trans, books_trans):
|
||||||
|
matches = []
|
||||||
|
for r1 in statement_trans:
|
||||||
|
matches.append(([r1], [], ['no match']))
|
||||||
|
for r2 in books_trans:
|
||||||
|
matches.append(([], [r2], ['no match']))
|
||||||
|
return matches
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
# TODO: Should put in a sanity check to make sure the statement you're feeding
|
# TODO: Should put in a sanity check to make sure the statement you're feeding
|
||||||
# in matches the account you've provided.
|
# in matches the account you've provided.
|
||||||
|
@ -443,10 +493,13 @@ def main(args):
|
||||||
|
|
||||||
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
||||||
|
|
||||||
matches = match_statement_and_books(statement_trans, books_trans)
|
matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
|
||||||
match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
|
subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans)
|
||||||
|
matches.extend(subset_matches)
|
||||||
|
unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
|
||||||
|
matches.extend(unmatched)
|
||||||
|
|
||||||
# assert books_balance == books_balance_reconciled + total_matched + total_missing_from_statement
|
match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
|
||||||
|
|
||||||
total_matched, total_missing_from_books, total_missing_from_statement = totals(matches)
|
total_matched, total_missing_from_books, total_missing_from_statement = totals(matches)
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ from conservancy_beancount.reconcile.statement_reconciler import (
|
||||||
metadata_for_match,
|
metadata_for_match,
|
||||||
write_metadata_to_books,
|
write_metadata_to_books,
|
||||||
totals,
|
totals,
|
||||||
|
subset_match,
|
||||||
)
|
)
|
||||||
|
|
||||||
# These data structures represent individual transactions as taken from the
|
# These data structures represent individual transactions as taken from the
|
||||||
|
@ -40,6 +41,13 @@ S3 = {
|
||||||
'check_id': '',
|
'check_id': '',
|
||||||
'line': 444,
|
'line': 444,
|
||||||
}
|
}
|
||||||
|
S4 = {
|
||||||
|
'date': datetime.date(2022, 8, 11),
|
||||||
|
'amount': decimal.Decimal('-2260.00'),
|
||||||
|
'payee': 'Trust 0000000362 210',
|
||||||
|
'check_id': '',
|
||||||
|
'line': 555,
|
||||||
|
}
|
||||||
|
|
||||||
# Books transaction examples.
|
# Books transaction examples.
|
||||||
B1 = {
|
B1 = {
|
||||||
|
@ -114,81 +122,116 @@ B3_unmatched_check_id = {
|
||||||
'line': 999,
|
'line': 999,
|
||||||
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
||||||
}
|
}
|
||||||
|
B4A = {
|
||||||
|
'date': datetime.date(2022, 8, 11),
|
||||||
|
'amount': decimal.Decimal('-250.00'),
|
||||||
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
||||||
|
'check_id': '',
|
||||||
|
'line': 1000,
|
||||||
|
}
|
||||||
|
B4B = {
|
||||||
|
'date': datetime.date(2022, 8, 11),
|
||||||
|
'amount': decimal.Decimal('-250.00'),
|
||||||
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
||||||
|
'check_id': '',
|
||||||
|
'line': 1000,
|
||||||
|
}
|
||||||
|
B4C = {
|
||||||
|
'date': datetime.date(2022, 8, 11),
|
||||||
|
'amount': decimal.Decimal('-1760.00'),
|
||||||
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
||||||
|
'check_id': '',
|
||||||
|
'line': 1000,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_one_exact_match():
|
def test_one_exact_match():
|
||||||
statement = [S1]
|
statement = [S1]
|
||||||
books = [B1]
|
books = [B1]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
# Match, match, notes.
|
# Match, match, notes.
|
||||||
#
|
#
|
||||||
# The matches are a list so we can implement subset-sum matching where
|
# The matches are a list so we can implement subset-sum matching where
|
||||||
# multiple books transactions may match to a single statement
|
# multiple books transactions may match to a single statement
|
||||||
# transaction.
|
# transaction.
|
||||||
([S1], [B1], []),
|
[([S1], [B1], [])],
|
||||||
]
|
[],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_multiple_exact_matches():
|
def test_multiple_exact_matches():
|
||||||
statement = [S1, S2]
|
statement = [S1, S2]
|
||||||
books = [B1, B2]
|
books = [B1, B2]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S1], [B1], []),
|
[([S1], [B1], []), ([S2], [B2], [])],
|
||||||
([S2], [B2], []),
|
[],
|
||||||
]
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_one_mismatch():
|
def test_one_mismatch():
|
||||||
statement = [S1]
|
statement = [S1]
|
||||||
books = []
|
books = []
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S1], [], ['no match']),
|
[],
|
||||||
]
|
[S1],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_multiple_mismatches():
|
def test_multiple_mismatches():
|
||||||
statement = [S1]
|
statement = [S1]
|
||||||
books = [B2]
|
books = [B2]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S1], [], ['no match']),
|
[],
|
||||||
([], [B2], ['no match']),
|
[S1],
|
||||||
]
|
[B2],
|
||||||
|
)
|
||||||
|
|
||||||
def test_next_day_matches():
|
def test_next_day_matches():
|
||||||
statement = [S3]
|
statement = [S3]
|
||||||
books = [B3_next_day]
|
books = [B3_next_day]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S3], [B3_next_day], ['+/- 1 days']),
|
[([S3], [B3_next_day], ['+/- 1 days'])],
|
||||||
]
|
[],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_next_week_matches():
|
def test_next_week_matches():
|
||||||
statement = [S3]
|
statement = [S3]
|
||||||
books = [B3_next_week]
|
books = [B3_next_week]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S3], [B3_next_week], ['+/- 7 days']),
|
[([S3], [B3_next_week], ['+/- 7 days'])],
|
||||||
]
|
[],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_incorrect_amount_does_not_match():
|
def test_incorrect_amount_does_not_match():
|
||||||
statement = [S3]
|
statement = [S3]
|
||||||
books = [B3_mismatch_amount]
|
books = [B3_mismatch_amount]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S3], [], ['no match']),
|
[],
|
||||||
([], [B3_mismatch_amount], ['no match']),
|
[S3],
|
||||||
]
|
[B3_mismatch_amount],
|
||||||
|
)
|
||||||
|
|
||||||
def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
|
def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
|
||||||
statement = [S3]
|
statement = [S3]
|
||||||
books = [B3_payee_mismatch_1]
|
books = [B3_payee_mismatch_1]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S3], [B3_payee_mismatch_1], ['payee mismatch', 'only one decent match']),
|
[([S3], [B3_payee_mismatch_1], ['payee mismatch', 'only one decent match'])],
|
||||||
]
|
[],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
|
def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
|
||||||
statement = [S3]
|
statement = [S3]
|
||||||
books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
|
books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
|
||||||
match = match_statement_and_books(statement, books)
|
match = match_statement_and_books(statement, books)
|
||||||
assert match == [
|
assert match == (
|
||||||
([S3], [], ['no match']),
|
[],
|
||||||
([], [B3_payee_mismatch_1], ['no match']),
|
[S3],
|
||||||
([], [B3_payee_mismatch_2], ['no match']),
|
[B3_payee_mismatch_1, B3_payee_mismatch_2],
|
||||||
]
|
)
|
||||||
|
|
||||||
def test_remove_payee_junk():
|
def test_remove_payee_junk():
|
||||||
assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
|
assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
|
||||||
|
@ -251,7 +294,17 @@ def test_payee_not_considered_if_check_id_present():
|
||||||
# These records match aside from check-id.
|
# These records match aside from check-id.
|
||||||
statement = [S3]
|
statement = [S3]
|
||||||
books = [B3_unmatched_check_id]
|
books = [B3_unmatched_check_id]
|
||||||
assert match_statement_and_books(statement, books) == [
|
assert match_statement_and_books(statement, books) == (
|
||||||
([S3], [], ['no match']),
|
[],
|
||||||
([], [B3_unmatched_check_id], ['no match']),
|
[S3],
|
||||||
]
|
[B3_unmatched_check_id],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_subset_sum_match():
|
||||||
|
statement = [S4]
|
||||||
|
books = [B4A, B4B, B4C]
|
||||||
|
assert subset_match(statement, books) == (
|
||||||
|
[([S4], [B4A, B4B, B4C], [])],
|
||||||
|
[], # No remaining statement trans.
|
||||||
|
[], # No remaining books trans.
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue