diff --git a/conservancy_beancount/reconcile/prototype_amex_reconciler.py b/conservancy_beancount/reconcile/prototype_amex_reconciler.py new file mode 100644 index 0000000..1f680b1 --- /dev/null +++ b/conservancy_beancount/reconcile/prototype_amex_reconciler.py @@ -0,0 +1,105 @@ +"""A prototype AMEX statement reconciler. + +Run like this: + +$ python3 -m pip install thefuzz +$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py --beancount-file=$HOME/conservancy/beancount/books/2021.beancount --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv +""" + +import argparse +import csv +import datetime +import decimal + +from beancount import loader +from beancount.query.query import run_query +from thefuzz import fuzz + +# NOTE: Statement doesn't seem to give us a running balance or a final total. + +def standardize_amex_record(row): + return { + 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), + 'amount': -1 * decimal.Decimal(row['Amount']), + 'payee': row['Description'], + } + + +def standardize_beancount_record(row): + return { + 'date': row.date, + 'amount': row.number_cost_position, + 'payee': row.payee if row.payee else row.narration, + } + + +def format_record(record): + return f"{record['date'].isoformat()}: {record['amount']:>8} {record['payee'][:20]:<20}" + + +def sort_records(records): + return sorted(records, key=lambda x: (x['date'], x['amount'])) + + +def records_match(r1, r2): + """Do these records represent the same transaction?""" + date_matches = r1['date'] >= r2['date'] - datetime.timedelta(days=1) and r1['date'] <= r2['date'] + datetime.timedelta(days=1) + amount_matches = r1['amount'] == r2['amount'] + payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee']) + payee_matches = payee_match_quality > 50 + if date_matches and amount_matches and payee_matches: + return True, '' + elif date_matches and amount_matches: + return True, f'Low payee match ({payee_match_quality}%)' + else: + return False, '' + + +parser = argparse.ArgumentParser(description='Reconciliation helper') +parser.add_argument('--beancount-file', required=True) +parser.add_argument('--amex-csv', required=True) +parser.add_argument('--grep-output-filename') +# parser.add_argument('--report-group-regex') +args = parser.parse_args() + +with open(args.amex_csv) as f: + reader = csv.DictReader(f) + statement_trans = sort_records([standardize_amex_record(row) for row in reader]) + +begin_date = statement_trans[0]['date'] +end_date = statement_trans[-1]['date'] + +# Do we traverse and filter the in-memory entries list and filter that, or do we +# use Beancount Query Language to get a list of transactions? +entries, _, options = loader.load_file(args.beancount_file) +result_types, result_rows = run_query( + entries, + options, + f"SELECT filename, lineno, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}", +) +books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) + +num_statement_records = len(statement_trans) +num_books_trans = len(books_trans) +statement_index = 0 +books_index = 0 +matches = [] +for r1 in statement_trans: + for r2 in books_trans: + match, note = records_match(r1, r2) + if match: + matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)} {note.upper()}']) + books_trans.remove(r2) + break + else: + matches.append([r1['date'], f'{format_record(r1)} --> {" ":^41} NOT IN BOOKS']) +for r2 in books_trans: + matches.append([r2['date'], f'{" ":^41} --> {format_record(r2)} NOT ON STATEMENT']) + +print(f'{"STATEMENT":<40} {"BOOKS":<40} NOTES') +for _, output in sorted(matches): + print(output) + +# Local Variables: +# python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python" +# End: