reconcile: Add prototype AMEX statement/books reconciler.

2022-02-01 17:34:15 +11:00 · 2022-02-01 17:34:15 +11:00 · f3e824f68b
commit f3e824f68b
parent 9e6b5b408a
1 changed files with 105 additions and 0 deletions
--- a/conservancy_beancount/reconcile/prototype_amex_reconciler.py
+++ b/conservancy_beancount/reconcile/prototype_amex_reconciler.py
@ -0,0 +1,105 @@
+"""A prototype AMEX statement reconciler.
+
+Run like this:
+
+$ python3 -m pip install thefuzz
+$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py --beancount-file=$HOME/conservancy/beancount/books/2021.beancount --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv
+"""
+
+import argparse
+import csv
+import datetime
+import decimal
+
+from beancount import loader
+from beancount.query.query import run_query
+from thefuzz import fuzz
+
+# NOTE: Statement doesn't seem to give us a running balance or a final total.
+
+def standardize_amex_record(row):
+    return {
+        'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
+        'amount': -1 * decimal.Decimal(row['Amount']),
+        'payee': row['Description'],
+    }
+
+
+def standardize_beancount_record(row):
+    return {
+        'date': row.date,
+        'amount': row.number_cost_position,
+        'payee': row.payee if row.payee else row.narration,
+    }
+
+
+def format_record(record):
+    return f"{record['date'].isoformat()}: {record['amount']:>8} {record['payee'][:20]:<20}"
+
+
+def sort_records(records):
+    return sorted(records, key=lambda x: (x['date'], x['amount']))
+
+
+def records_match(r1, r2):
+    """Do these records represent the same transaction?"""
+    date_matches = r1['date'] >= r2['date'] - datetime.timedelta(days=1) and r1['date'] <= r2['date'] + datetime.timedelta(days=1)
+    amount_matches = r1['amount'] == r2['amount']
+    payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
+    payee_matches = payee_match_quality > 50
+    if date_matches and amount_matches and payee_matches:
+        return True, ''
+    elif date_matches and amount_matches:
+        return True, f'Low payee match ({payee_match_quality}%)'
+    else:
+        return False, ''
+
+
+parser = argparse.ArgumentParser(description='Reconciliation helper')
+parser.add_argument('--beancount-file', required=True)
+parser.add_argument('--amex-csv', required=True)
+parser.add_argument('--grep-output-filename')
+# parser.add_argument('--report-group-regex')
+args = parser.parse_args()
+
+with open(args.amex_csv) as f:
+    reader = csv.DictReader(f)
+    statement_trans = sort_records([standardize_amex_record(row) for row in reader])
+
+begin_date = statement_trans[0]['date']
+end_date = statement_trans[-1]['date']
+
+# Do we traverse and filter the in-memory entries list and filter that, or do we
+# use Beancount Query Language to get a list of transactions?
+entries, _, options = loader.load_file(args.beancount_file)
+result_types, result_rows = run_query(
+    entries,
+    options,
+    f"SELECT filename, lineno, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}",
+)
+books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
+
+num_statement_records = len(statement_trans)
+num_books_trans = len(books_trans)
+statement_index = 0
+books_index = 0
+matches = []
+for r1 in statement_trans:
+    for r2 in books_trans:
+        match, note = records_match(r1, r2)
+        if match:
+            matches.append([r2['date'], f'{format_record(r1)}    -->    {format_record(r2)}  {note.upper()}'])
+            books_trans.remove(r2)
+            break
+    else:
+        matches.append([r1['date'], f'{format_record(r1)}    -->    {" ":^41}  NOT IN BOOKS'])
+for r2 in books_trans:
+    matches.append([r2['date'], f'{" ":^41}    -->    {format_record(r2)}  NOT ON STATEMENT'])
+
+print(f'{"STATEMENT":<40}            {"BOOKS":<40}   NOTES')
+for _, output in sorted(matches):
+    print(output)
+
+# Local Variables:
+# python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"
+# End: