From 3f735bec954038aabf5da27f5253c593bdd5d43c Mon Sep 17 00:00:00 2001 From: Ben Sturmfels Date: Wed, 9 Feb 2022 14:03:05 +1100 Subject: [PATCH] reconcile: Show file and line number of mismatches. --- .../reconcile/prototype_amex_reconciler.py | 56 +++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/conservancy_beancount/reconcile/prototype_amex_reconciler.py b/conservancy_beancount/reconcile/prototype_amex_reconciler.py index 84cd638..5bba689 100644 --- a/conservancy_beancount/reconcile/prototype_amex_reconciler.py +++ b/conservancy_beancount/reconcile/prototype_amex_reconciler.py @@ -13,14 +13,14 @@ This tool is still somewhat like an importer in that it needs to extract transaction details from a third-party statement. Instead of creating directives, it just checks to see that similar directives are already present. -Problem this attempts to address: - - errors in the books take hours to find during reconciliation ("you're entering a world of pain" - - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml - - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors) +Problems this attempts to address: + - errors in the books take hours to find during reconciliation ("you're entering a world of pain") - adding statement/reconciliation metadata to books is manual and prone to mistakes + - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors) + - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml - creating reconciliation reports - - normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing) - jumping to an individual transaction in the books isn't trivial - Emacs grep mode is helpful + - normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing) Q. How are reconciliation reports created currently? How do you read them? @@ -32,6 +32,7 @@ import argparse import csv import datetime import decimal +import os from typing import Dict, List, Tuple from beancount import loader @@ -41,11 +42,12 @@ from thefuzz import fuzz # type: ignore # NOTE: Statement doesn't seem to give us a running balance or a final total. -def standardize_amex_record(row: Dict) -> Dict: +def standardize_amex_record(row: Dict, line: int) -> Dict: """Turn an AMEX CSV row into a standard dict format representing a transaction.""" return { 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), 'amount': -1 * decimal.Decimal(row['Amount']), + 'line': line, 'payee': row['Description'] or '', } @@ -61,16 +63,17 @@ def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def] 'statement': row.posting_statement, } -def standardize_fr_record(row: Dict) -> Dict: +def standardize_fr_record(row: Dict, line: int) -> Dict: return { 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), 'amount': decimal.Decimal(row['Amount']), - 'payee': row['Detail'] or '', + 'payee': row['Detail'] or row['Description'], + 'line': line, } def format_record(record: Dict) -> str: - return f"{record['date'].isoformat()}: {record['amount']:>11} {record['payee'][:20]:<20}" + return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}" def sort_records(records: List) -> List: @@ -79,14 +82,19 @@ def sort_records(records: List) -> List: def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: """Do these records represent the same transaction?""" - date_matches = r1['date'] >= r2['date'] - datetime.timedelta(days=1) and r1['date'] <= r2['date'] + datetime.timedelta(days=1) + date_matches_exactly = r1['date'] == r2['date'] + date_matches_loosly = r1['date'] >= r2['date'] - datetime.timedelta(days=3) and r1['date'] <= r2['date'] + datetime.timedelta(days=3) amount_matches = r1['amount'] == r2['amount'] payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee']) payee_matches = payee_match_quality > 50 - if date_matches and amount_matches and payee_matches: + if date_matches_exactly and amount_matches and payee_matches: return True, 'Matched' - elif date_matches and amount_matches: + elif date_matches_loosly and amount_matches and payee_matches: + return True, 'Matched +/- 3 days' + elif date_matches_exactly and amount_matches: return True, f'Matched with low-payee match ({payee_match_quality}%)' + elif date_matches_loosly and amount_matches: + return True, f'Matched +/- 3 days, low-payee match ({payee_match_quality}%)' else: return False, '' @@ -109,7 +117,7 @@ else: with open(args.csv_statement) as f: reader = csv.DictReader(f) - statement_trans = sort_records([standardize_statement_record(row) for row in reader]) + statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader]) begin_date = statement_trans[0]['date'] end_date = statement_trans[-1]['date'] @@ -132,7 +140,6 @@ result_types, result_rows = run_query( query, ) - books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) num_statement_records = len(statement_trans) @@ -146,27 +153,32 @@ metadata_to_apply = [] # the books. If found, the books transaction is marked off so that it can only # be matched once. Some transactions will be matched, some will be on the # statement but not the books and some on the books but not the statement. +# +# Doesn't currently find exact matches when there are a bunch of transactions +# for the same amount on the same day. Probably ought to do a first pass through +# the books transactions to find an closely matching payee, then do another pass +# disregarding payee. for r1 in statement_trans: for r2 in books_trans: match, note = records_match(r1, r2) if match: if not r2['statement'] or args.show_reconciled_matches: - matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)} ✓ {note}']) + matches.append([r2['date'], f'{format_record(r1)} → {format_record(r2)} ✓ {note}']) if not r2['statement']: - metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{args.csv_statement}"\n')) + metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{os.path.basename(args.csv_statement)}:{r2["line"]}"\n')) books_trans.remove(r2) break else: - matches.append([r1['date'], f'{format_record(r1)} --> {" ":^44} ✗ Not in books']) + matches.append([r1['date'], f'{format_record(r1)} → {" ":^45} ✗ Not in books ({os.path.basename(args.csv_statement)}:{r1["line"]})']) for r2 in books_trans: - matches.append([r2['date'], f'{" ":^44} --> {format_record(r2)} ✗ Not on statement']) + matches.append([r2['date'], f'{" ":^45} → {format_record(r2)} ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})']) -print(f'-----------------------------------------------------------------------------------------------------------------') -print(f'{"STATEMENT":<40} {"BOOKS":<40} NOTES') -print(f'-----------------------------------------------------------------------------------------------------------------') +print('-' * 155) +print(f'{"STATEMENT":<38} {"BOOKS":<44} NOTES') +print('-' * 155) for _, output in sorted(matches): print(output) -print(f'-----------------------------------------------------------------------------------------------------------------') +print('-' * 155) # Write statement metadata back to books if metadata_to_apply: