reconcile: Implement "subset sum" feature matching multiple books entries to a single statement entry.
This commit is contained in:
		
							parent
							
								
									965aeabde9
								
							
						
					
					
						commit
						405dd553cb
					
				
					 2 changed files with 171 additions and 65 deletions
				
			
		|  | @ -49,6 +49,7 @@ import csv | |||
| import datetime | ||||
| import decimal | ||||
| import io | ||||
| import itertools | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
|  | @ -57,6 +58,7 @@ from typing import Callable, Dict, List, Tuple, TextIO | |||
| 
 | ||||
| from beancount import loader | ||||
| from beancount.query.query import run_query | ||||
| from colorama import Fore, Style | ||||
| 
 | ||||
| if not sys.warnoptions: | ||||
|     import warnings | ||||
|  | @ -164,21 +166,26 @@ def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def] | |||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def format_record(records: list[dict]) -> str: | ||||
|     if len(records) == 1: | ||||
|         record = records[0] | ||||
| 
 | ||||
|         if record['payee'] and record['check_id']: | ||||
|             output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59) | ||||
|         elif record['payee']: | ||||
|             output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59) | ||||
|         else: | ||||
|             output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59) | ||||
|         return output | ||||
| def format_record(record: dict) -> str: | ||||
|     if record['payee'] and record['check_id']: | ||||
|         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59) | ||||
|     elif record['payee']: | ||||
|         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59) | ||||
|     else: | ||||
|         raise NotImplementedError | ||||
|         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59) | ||||
|     return output | ||||
| 
 | ||||
| 
 | ||||
| def format_multirecord(r1s, r2s, note): | ||||
|     total = sum(x['amount'] for x in r2s) | ||||
|     assert len(r1s) == 1 | ||||
|     assert len(r2s) > 1 | ||||
|     match_output = [] | ||||
|     match_output.append([r1s[0]['date'].isoformat() + ' ' + r1s[0]['payee'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}']) | ||||
|     for i, r2 in enumerate(r2s[1:]): | ||||
|         match_output.append([r1s[0]['date'].isoformat() + str(i) + r1s[0]['payee'], f'{r1s[0]["date"].isoformat()}:             ↳                                    →  {format_record(r2)}  ✓ Matched{note}']) | ||||
|     return match_output | ||||
| 
 | ||||
| def sort_records(records: List) -> List: | ||||
|     return sorted(records, key=lambda x: (x['date'], x['amount'])) | ||||
| 
 | ||||
|  | @ -253,13 +260,7 @@ def match_statement_and_books(statement_trans: list, books_trans: list): | |||
| 
 | ||||
|     """ | ||||
|     matches = [] | ||||
|     # We need a realised list and should be a copy so we can safely delete | ||||
|     # items. | ||||
|     books_trans = list(books_trans) | ||||
| 
 | ||||
|     # We can delete the matched books trans, but seems not a good idea to delete | ||||
|     # while iterating through statement_trans. Instead pushing onto a separate | ||||
|     # list. | ||||
|     remaining_books_trans = [] | ||||
|     remaining_statement_trans = [] | ||||
| 
 | ||||
|     for r1 in statement_trans: | ||||
|  | @ -278,12 +279,13 @@ def match_statement_and_books(statement_trans: list, books_trans: list): | |||
|             if best_match_score <= 0.8: | ||||
|                 best_match_note.append('only one decent match') | ||||
|             matches.append(([r1], [books_trans[best_match_index]], best_match_note)) | ||||
|             # Don't try to make a second match against this books entry. | ||||
|             del books_trans[best_match_index] | ||||
|         else: | ||||
|             matches.append(([r1], [], ['no match'])) | ||||
|             remaining_statement_trans.append(r1) | ||||
|     for r2 in books_trans: | ||||
|         matches.append(([], [r2], ['no match'])) | ||||
|     return matches | ||||
|         remaining_books_trans.append(r2) | ||||
|     return matches, remaining_statement_trans, remaining_books_trans | ||||
| 
 | ||||
| 
 | ||||
| def format_matches(matches, csv_statement: str, show_reconciled_matches): | ||||
|  | @ -292,12 +294,15 @@ def format_matches(matches, csv_statement: str, show_reconciled_matches): | |||
|         note = ', '.join(note) | ||||
|         note = ': ' + note if note else note | ||||
|         if r1 and r2: | ||||
|             if show_reconciled_matches: | ||||
|                 match_output.append([r1[0]['date'], f'{format_record(r1)}  →  {format_record(r2)}  ✓ Matched{note}']) | ||||
|             if show_reconciled_matches and all(x['bank_statement'] for x in r2): | ||||
|                 if len(r2) == 1: | ||||
|                     match_output.append([r1[0]['date'].isoformat() + r1[0]['payee'], f'{format_record(r1[0])}  →  {format_record(r2[0])}  ✓ Matched{note}']) | ||||
|                 else: | ||||
|                     match_output.extend(format_multirecord(r1, r2, note)) | ||||
|         elif r1: | ||||
|             match_output.append([r1[0]['date'], f'{format_record(r1)}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})']) | ||||
|             match_output.append([r1[0]['date'].isoformat() + r1[0]['payee'], Fore.RED + Style.BRIGHT + f'{format_record(r1[0])}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})' + Style.RESET_ALL]) | ||||
|         else: | ||||
|             match_output.append([r2[0]['date'], f'{" ":^59}  →  {format_record(r2)}  ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})']) | ||||
|             match_output.append([r2[0]['date'].isoformat() + r2[0]['payee'], Fore.RED + Style.BRIGHT + f'{" ":^59}  →  {format_record(r2[0])}  ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})' + Style.RESET_ALL]) | ||||
|     return match_output | ||||
| 
 | ||||
| 
 | ||||
|  | @ -400,6 +405,51 @@ def totals(matches): | |||
|             total_missing_from_statement += sum(c['amount'] for c in books_entries) | ||||
|     return total_matched, total_missing_from_books, total_missing_from_statement | ||||
| 
 | ||||
| 
 | ||||
| def subset_match(statement_trans, books_trans): | ||||
|     matches = [] | ||||
|     remaining_books_trans = [] | ||||
|     remaining_statement_trans = [] | ||||
| 
 | ||||
|     groups = itertools.groupby(books_trans, key=lambda x: (x['date'], x['payee'])) | ||||
|     for k, group in groups: | ||||
|         best_match_score = 0 | ||||
|         best_match_index = None | ||||
|         best_match_note = '' | ||||
|         matches_found = 0 | ||||
| 
 | ||||
|         group_items = list(group) | ||||
|         total = sum(x['amount'] for x in group_items) | ||||
|         r2 = copy.copy(group_items[0]) | ||||
|         r2['amount'] = total | ||||
|         for i, r1 in enumerate(statement_trans): | ||||
|             score, note = records_match(r1, r2) | ||||
|             if score >= 0.5 and score >= best_match_score: | ||||
|                 matches_found += 1 | ||||
|                 best_match_score = score | ||||
|                 best_match_index = i | ||||
|                 best_match_note = note | ||||
|         if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8: | ||||
|             if best_match_score <= 0.8: | ||||
|                 best_match_note.append('only one decent match') | ||||
|             matches.append(([statement_trans[best_match_index]], group_items, best_match_note)) | ||||
|             del statement_trans[best_match_index] | ||||
|             for item in group_items: | ||||
|                 books_trans.remove(item) | ||||
|         else: | ||||
|             remaining_books_trans.append(r2) | ||||
|     for r1 in statement_trans: | ||||
|          remaining_statement_trans.append(r1) | ||||
|     return matches, remaining_statement_trans, remaining_books_trans | ||||
| 
 | ||||
| def process_unmatched(statement_trans, books_trans): | ||||
|     matches = [] | ||||
|     for r1 in statement_trans: | ||||
|         matches.append(([r1], [], ['no match'])) | ||||
|     for r2 in books_trans: | ||||
|         matches.append(([], [r2], ['no match'])) | ||||
|     return matches | ||||
| 
 | ||||
| def main(args): | ||||
|     # TODO: Should put in a sanity check to make sure the statement you're feeding | ||||
|     # in matches the account you've provided. | ||||
|  | @ -443,10 +493,13 @@ def main(args): | |||
| 
 | ||||
|     books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) | ||||
| 
 | ||||
|     matches = match_statement_and_books(statement_trans, books_trans) | ||||
|     match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches) | ||||
|     matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans) | ||||
|     subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans) | ||||
|     matches.extend(subset_matches) | ||||
|     unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans) | ||||
|     matches.extend(unmatched) | ||||
| 
 | ||||
|     # assert books_balance == books_balance_reconciled + total_matched + total_missing_from_statement | ||||
|     match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches) | ||||
| 
 | ||||
|     total_matched, total_missing_from_books, total_missing_from_statement = totals(matches) | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ from conservancy_beancount.reconcile.statement_reconciler import ( | |||
|     metadata_for_match, | ||||
|     write_metadata_to_books, | ||||
|     totals, | ||||
|     subset_match, | ||||
| ) | ||||
| 
 | ||||
| # These data structures represent individual transactions as taken from the | ||||
|  | @ -40,6 +41,13 @@ S3 = { | |||
|     'check_id': '', | ||||
|     'line': 444, | ||||
| } | ||||
| S4 = { | ||||
|     'date': datetime.date(2022, 8, 11), | ||||
|     'amount': decimal.Decimal('-2260.00'), | ||||
|     'payee': 'Trust 0000000362 210', | ||||
|     'check_id': '', | ||||
|     'line': 555, | ||||
| } | ||||
| 
 | ||||
| # Books transaction examples. | ||||
| B1 = { | ||||
|  | @ -114,81 +122,116 @@ B3_unmatched_check_id = { | |||
|     'line': 999, | ||||
|     'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" | ||||
| } | ||||
| B4A = { | ||||
|     'date': datetime.date(2022, 8, 11), | ||||
|     'amount': decimal.Decimal('-250.00'), | ||||
|     'payee': 'TRUST 0000000362 ACH Retirement Plan', | ||||
|     'check_id': '', | ||||
|     'line': 1000, | ||||
| } | ||||
| B4B = { | ||||
|     'date': datetime.date(2022, 8, 11), | ||||
|     'amount': decimal.Decimal('-250.00'), | ||||
|     'payee': 'TRUST 0000000362 ACH Retirement Plan', | ||||
|     'check_id': '', | ||||
|     'line': 1000, | ||||
| } | ||||
| B4C = { | ||||
|     'date': datetime.date(2022, 8, 11), | ||||
|     'amount': decimal.Decimal('-1760.00'), | ||||
|     'payee': 'TRUST 0000000362 ACH Retirement Plan', | ||||
|     'check_id': '', | ||||
|     'line': 1000, | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def test_one_exact_match(): | ||||
|     statement = [S1] | ||||
|     books = [B1] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         # Match, match, notes. | ||||
|         # | ||||
|         # The matches are a list so we can implement subset-sum matching where | ||||
|         # multiple books transactions may match to a single statement | ||||
|         # transaction. | ||||
|         ([S1], [B1], []), | ||||
|     ] | ||||
|         [([S1], [B1], [])], | ||||
|         [], | ||||
|         [], | ||||
|     ) | ||||
| 
 | ||||
| def test_multiple_exact_matches(): | ||||
|     statement = [S1, S2] | ||||
|     books = [B1, B2] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S1], [B1], []), | ||||
|         ([S2], [B2], []), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [([S1], [B1], []), ([S2], [B2], [])], | ||||
|         [], | ||||
|         [], | ||||
|     ) | ||||
| 
 | ||||
| def test_one_mismatch(): | ||||
|     statement = [S1] | ||||
|     books = [] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S1], [], ['no match']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [], | ||||
|         [S1], | ||||
|         [], | ||||
|     ) | ||||
| 
 | ||||
| def test_multiple_mismatches(): | ||||
|     statement = [S1] | ||||
|     books = [B2] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S1], [], ['no match']), | ||||
|         ([], [B2], ['no match']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [], | ||||
|         [S1], | ||||
|         [B2], | ||||
|     ) | ||||
| 
 | ||||
| def test_next_day_matches(): | ||||
|     statement = [S3] | ||||
|     books = [B3_next_day] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S3], [B3_next_day], ['+/- 1 days']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [([S3], [B3_next_day], ['+/- 1 days'])], | ||||
|         [], | ||||
|         [], | ||||
|     ) | ||||
| 
 | ||||
| def test_next_week_matches(): | ||||
|     statement = [S3] | ||||
|     books = [B3_next_week] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S3], [B3_next_week], ['+/- 7 days']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [([S3], [B3_next_week], ['+/- 7 days'])], | ||||
|         [], | ||||
|         [], | ||||
|     ) | ||||
| 
 | ||||
| def test_incorrect_amount_does_not_match(): | ||||
|     statement = [S3] | ||||
|     books = [B3_mismatch_amount] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S3], [], ['no match']), | ||||
|         ([], [B3_mismatch_amount], ['no match']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [], | ||||
|         [S3], | ||||
|         [B3_mismatch_amount], | ||||
|     ) | ||||
| 
 | ||||
| def test_payee_mismatch_ok_when_only_one_that_amount_and_date(): | ||||
|     statement = [S3] | ||||
|     books = [B3_payee_mismatch_1] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S3], [B3_payee_mismatch_1], ['payee mismatch', 'only one decent match']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [([S3], [B3_payee_mismatch_1], ['payee mismatch', 'only one decent match'])], | ||||
|         [], | ||||
|         [], | ||||
|     ) | ||||
| 
 | ||||
| def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date(): | ||||
|     statement = [S3] | ||||
|     books = [B3_payee_mismatch_1, B3_payee_mismatch_2] | ||||
|     match = match_statement_and_books(statement, books) | ||||
|     assert match == [ | ||||
|         ([S3], [], ['no match']), | ||||
|         ([], [B3_payee_mismatch_1], ['no match']), | ||||
|         ([], [B3_payee_mismatch_2], ['no match']), | ||||
|     ] | ||||
|     assert match == ( | ||||
|         [], | ||||
|         [S3], | ||||
|         [B3_payee_mismatch_1, B3_payee_mismatch_2], | ||||
|     ) | ||||
| 
 | ||||
| def test_remove_payee_junk(): | ||||
|     assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS' | ||||
|  | @ -251,7 +294,17 @@ def test_payee_not_considered_if_check_id_present(): | |||
|     # These records match aside from check-id. | ||||
|     statement = [S3] | ||||
|     books = [B3_unmatched_check_id] | ||||
|     assert match_statement_and_books(statement, books) == [ | ||||
|         ([S3], [], ['no match']), | ||||
|         ([], [B3_unmatched_check_id], ['no match']), | ||||
|     ] | ||||
|     assert match_statement_and_books(statement, books) == ( | ||||
|         [], | ||||
|         [S3], | ||||
|         [B3_unmatched_check_id], | ||||
|     ) | ||||
| 
 | ||||
| def test_subset_sum_match(): | ||||
|     statement = [S4] | ||||
|     books = [B4A, B4B, B4C] | ||||
|     assert subset_match(statement, books) == ( | ||||
|         [([S4], [B4A, B4B, B4C], [])], | ||||
|         [],  # No remaining statement trans. | ||||
|         [],  # No remaining books trans. | ||||
|     ) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue