reconcile: Show file and line number of mismatches.

This commit is contained in:
Ben Sturmfels 2022-02-09 14:03:05 +11:00
parent 6b1ce7d73a
commit 3f735bec95
Signed by: bsturmfels
GPG key ID: 023C05E2C9C068F0

View file

@ -13,14 +13,14 @@ This tool is still somewhat like an importer in that it needs to extract
transaction details from a third-party statement. Instead of creating transaction details from a third-party statement. Instead of creating
directives, it just checks to see that similar directives are already present. directives, it just checks to see that similar directives are already present.
Problem this attempts to address: Problems this attempts to address:
- errors in the books take hours to find during reconciliation ("you're entering a world of pain" - errors in the books take hours to find during reconciliation ("you're entering a world of pain")
- balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
- paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
- adding statement/reconciliation metadata to books is manual and prone to mistakes - adding statement/reconciliation metadata to books is manual and prone to mistakes
- paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
- balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
- creating reconciliation reports - creating reconciliation reports
- normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing)
- jumping to an individual transaction in the books isn't trivial - Emacs grep mode is helpful - jumping to an individual transaction in the books isn't trivial - Emacs grep mode is helpful
- normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing)
Q. How are reconciliation reports created currently? How do you read them? Q. How are reconciliation reports created currently? How do you read them?
@ -32,6 +32,7 @@ import argparse
import csv import csv
import datetime import datetime
import decimal import decimal
import os
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
from beancount import loader from beancount import loader
@ -41,11 +42,12 @@ from thefuzz import fuzz # type: ignore
# NOTE: Statement doesn't seem to give us a running balance or a final total. # NOTE: Statement doesn't seem to give us a running balance or a final total.
def standardize_amex_record(row: Dict) -> Dict: def standardize_amex_record(row: Dict, line: int) -> Dict:
"""Turn an AMEX CSV row into a standard dict format representing a transaction.""" """Turn an AMEX CSV row into a standard dict format representing a transaction."""
return { return {
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
'amount': -1 * decimal.Decimal(row['Amount']), 'amount': -1 * decimal.Decimal(row['Amount']),
'line': line,
'payee': row['Description'] or '', 'payee': row['Description'] or '',
} }
@ -61,16 +63,17 @@ def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
'statement': row.posting_statement, 'statement': row.posting_statement,
} }
def standardize_fr_record(row: Dict) -> Dict: def standardize_fr_record(row: Dict, line: int) -> Dict:
return { return {
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
'amount': decimal.Decimal(row['Amount']), 'amount': decimal.Decimal(row['Amount']),
'payee': row['Detail'] or '', 'payee': row['Detail'] or row['Description'],
'line': line,
} }
def format_record(record: Dict) -> str: def format_record(record: Dict) -> str:
return f"{record['date'].isoformat()}: {record['amount']:>11} {record['payee'][:20]:<20}" return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}"
def sort_records(records: List) -> List: def sort_records(records: List) -> List:
@ -79,14 +82,19 @@ def sort_records(records: List) -> List:
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
"""Do these records represent the same transaction?""" """Do these records represent the same transaction?"""
date_matches = r1['date'] >= r2['date'] - datetime.timedelta(days=1) and r1['date'] <= r2['date'] + datetime.timedelta(days=1) date_matches_exactly = r1['date'] == r2['date']
date_matches_loosly = r1['date'] >= r2['date'] - datetime.timedelta(days=3) and r1['date'] <= r2['date'] + datetime.timedelta(days=3)
amount_matches = r1['amount'] == r2['amount'] amount_matches = r1['amount'] == r2['amount']
payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee']) payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
payee_matches = payee_match_quality > 50 payee_matches = payee_match_quality > 50
if date_matches and amount_matches and payee_matches: if date_matches_exactly and amount_matches and payee_matches:
return True, 'Matched' return True, 'Matched'
elif date_matches and amount_matches: elif date_matches_loosly and amount_matches and payee_matches:
return True, 'Matched +/- 3 days'
elif date_matches_exactly and amount_matches:
return True, f'Matched with low-payee match ({payee_match_quality}%)' return True, f'Matched with low-payee match ({payee_match_quality}%)'
elif date_matches_loosly and amount_matches:
return True, f'Matched +/- 3 days, low-payee match ({payee_match_quality}%)'
else: else:
return False, '' return False, ''
@ -109,7 +117,7 @@ else:
with open(args.csv_statement) as f: with open(args.csv_statement) as f:
reader = csv.DictReader(f) reader = csv.DictReader(f)
statement_trans = sort_records([standardize_statement_record(row) for row in reader]) statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader])
begin_date = statement_trans[0]['date'] begin_date = statement_trans[0]['date']
end_date = statement_trans[-1]['date'] end_date = statement_trans[-1]['date']
@ -132,7 +140,6 @@ result_types, result_rows = run_query(
query, query,
) )
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
num_statement_records = len(statement_trans) num_statement_records = len(statement_trans)
@ -146,27 +153,32 @@ metadata_to_apply = []
# the books. If found, the books transaction is marked off so that it can only # the books. If found, the books transaction is marked off so that it can only
# be matched once. Some transactions will be matched, some will be on the # be matched once. Some transactions will be matched, some will be on the
# statement but not the books and some on the books but not the statement. # statement but not the books and some on the books but not the statement.
#
# Doesn't currently find exact matches when there are a bunch of transactions
# for the same amount on the same day. Probably ought to do a first pass through
# the books transactions to find an closely matching payee, then do another pass
# disregarding payee.
for r1 in statement_trans: for r1 in statement_trans:
for r2 in books_trans: for r2 in books_trans:
match, note = records_match(r1, r2) match, note = records_match(r1, r2)
if match: if match:
if not r2['statement'] or args.show_reconciled_matches: if not r2['statement'] or args.show_reconciled_matches:
matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)}{note}']) matches.append([r2['date'], f'{format_record(r1)} {format_record(r2)}{note}'])
if not r2['statement']: if not r2['statement']:
metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{args.csv_statement}"\n')) metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{os.path.basename(args.csv_statement)}:{r2["line"]}"\n'))
books_trans.remove(r2) books_trans.remove(r2)
break break
else: else:
matches.append([r1['date'], f'{format_record(r1)} --> {" ":^44} ✗ Not in books']) matches.append([r1['date'], f'{format_record(r1)} {" ":^45} ✗ Not in books ({os.path.basename(args.csv_statement)}:{r1["line"]})'])
for r2 in books_trans: for r2 in books_trans:
matches.append([r2['date'], f'{" ":^44} --> {format_record(r2)} ✗ Not on statement']) matches.append([r2['date'], f'{" ":^45} {format_record(r2)} ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})'])
print(f'-----------------------------------------------------------------------------------------------------------------') print('-' * 155)
print(f'{"STATEMENT":<40} {"BOOKS":<40} NOTES') print(f'{"STATEMENT":<38} {"BOOKS":<44} NOTES')
print(f'-----------------------------------------------------------------------------------------------------------------') print('-' * 155)
for _, output in sorted(matches): for _, output in sorted(matches):
print(output) print(output)
print(f'-----------------------------------------------------------------------------------------------------------------') print('-' * 155)
# Write statement metadata back to books # Write statement metadata back to books
if metadata_to_apply: if metadata_to_apply: