reconcile: Prototype writing statement metadata back to books.

This commit is contained in:
Ben Sturmfels 2022-02-08 22:06:01 +11:00
parent dd35ed25c2
commit 3792d46bcc
Signed by: bsturmfels
GPG key ID: 023C05E2C9C068F0

View file

@ -1,13 +1,24 @@
"""A prototype AMEX statement reconciler. """Reconcile an AMEX CSV statement against the books and print differences.
Run like this: Run like this:
$ python3 -m pip install thefuzz $ python3 -m pip install thefuzz
$ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py --beancount-file=$HOME/conservancy/beancount/books/2021.beancount --amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv $ python3 conservancy_beancount/reconcile/prototype_amex_reconciler.py \
""" --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
--amex-csv=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv
Conservancy currently enter data by hand rather than using Beancount importers.
This tool is still somewhat like an importer in that it needs to extract
transaction details from a third-party statement. Instead of creating
directives, it just checks to see that similar directives are already present.
TODO/ISSUES:
- AMEX statement doesn't provide bank balance or running total
"""
import argparse import argparse
import csv import csv
import collections
import datetime import datetime
import decimal import decimal
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
@ -19,6 +30,7 @@ from thefuzz import fuzz # type: ignore
# NOTE: Statement doesn't seem to give us a running balance or a final total. # NOTE: Statement doesn't seem to give us a running balance or a final total.
def standardize_amex_record(row: Dict) -> Dict: def standardize_amex_record(row: Dict) -> Dict:
"""Turn an AMEX CSV row into a standard dict format representing a transaction."""
return { return {
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
'amount': -1 * decimal.Decimal(row['Amount']), 'amount': -1 * decimal.Decimal(row['Amount']),
@ -27,10 +39,14 @@ def standardize_amex_record(row: Dict) -> Dict:
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def] def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
"""Turn a Beancount query result row into a standard dict representing a transaction."""
return { return {
'date': row.date, 'date': row.date,
'amount': row.number_cost_position, 'amount': row.number_cost_position,
'payee': row.payee if row.payee else row.narration, 'payee': row.payee if row.payee else row.narration,
'filename': row.filename,
'line': row.posting_line,
'statement': row.posting_statement,
} }
@ -49,9 +65,9 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee']) payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
payee_matches = payee_match_quality > 50 payee_matches = payee_match_quality > 50
if date_matches and amount_matches and payee_matches: if date_matches and amount_matches and payee_matches:
return True, '' return True, 'Matched'
elif date_matches and amount_matches: elif date_matches and amount_matches:
return True, f'Low payee match ({payee_match_quality}%)' return True, f'Matched with low-payee match ({payee_match_quality}%)'
else: else:
return False, '' return False, ''
@ -61,6 +77,7 @@ parser.add_argument('--beancount-file', required=True)
parser.add_argument('--amex-csv', required=True) parser.add_argument('--amex-csv', required=True)
parser.add_argument('--grep-output-filename') parser.add_argument('--grep-output-filename')
# parser.add_argument('--report-group-regex') # parser.add_argument('--report-group-regex')
parser.add_argument('--show-reconciled-matches', action='store_true')
args = parser.parse_args() args = parser.parse_args()
with open(args.amex_csv) as f: with open(args.amex_csv) as f:
@ -71,12 +88,21 @@ begin_date = statement_trans[0]['date']
end_date = statement_trans[-1]['date'] end_date = statement_trans[-1]['date']
# Do we traverse and filter the in-memory entries list and filter that, or do we # Do we traverse and filter the in-memory entries list and filter that, or do we
# use Beancount Query Language to get a list of transactions? # use Beancount Query Language (BQL) to get a list of transactions? Currently
# using BQL.
#
# beancount.query.query_compile.compile() and
# beancount.query.query_execute.filter_entries() look useful in this respect,
# but I'm not clear on how to use compile(). An example would help.
entries, _, options = loader.load_file(args.beancount_file) entries, _, options = loader.load_file(args.beancount_file)
# String concatenation looks bad, but there's no SQL injection possible here
# because BQL can't write back to the Beancount files. I hope!
query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}"
result_types, result_rows = run_query( result_types, result_rows = run_query(
entries, entries,
options, options,
f"SELECT filename, lineno, date, number(cost(position)), payee, narration where account = 'Liabilities:CreditCard:AMEX' and date >= {begin_date} and date <= {end_date}", query,
) )
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
@ -85,21 +111,48 @@ num_books_trans = len(books_trans)
statement_index = 0 statement_index = 0
books_index = 0 books_index = 0
matches = [] matches = []
metadata_to_apply = []
# Run through all the statement transactions to find a matching transaction in
# the books. If found, the books transaction is marked off so that it can only
# be matched once. Some transactions will be matched, some will be on the
# statement but not the books and some on the books but not the statement.
for r1 in statement_trans: for r1 in statement_trans:
for r2 in books_trans: for r2 in books_trans:
match, note = records_match(r1, r2) match, note = records_match(r1, r2)
if match: if match:
matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)} {note.upper()}']) if not r2['statement'] or args.show_reconciled_matches:
matches.append([r2['date'], f'{format_record(r1)} --> {format_record(r2)}{note}'])
if not r2['statement']:
metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{args.amex_csv}"\n'))
books_trans.remove(r2) books_trans.remove(r2)
break break
else: else:
matches.append([r1['date'], f'{format_record(r1)} --> {" ":^41} NOT IN BOOKS']) matches.append([r1['date'], f'{format_record(r1)} --> {" ":^41} ✗ Not in books'])
for r2 in books_trans: for r2 in books_trans:
matches.append([r2['date'], f'{" ":^41} --> {format_record(r2)} NOT ON STATEMENT']) matches.append([r2['date'], f'{" ":^41} --> {format_record(r2)} ✗ Not on statement'])
print(f'-----------------------------------------------------------------------------------------------------------------')
print(f'{"STATEMENT":<40} {"BOOKS":<40} NOTES') print(f'{"STATEMENT":<40} {"BOOKS":<40} NOTES')
print(f'-----------------------------------------------------------------------------------------------------------------')
for _, output in sorted(matches): for _, output in sorted(matches):
print(output) print(output)
print(f'-----------------------------------------------------------------------------------------------------------------')
if metadata_to_apply:
print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
if input().lower() == 'y':
files = {}
for filename, line, metadata in metadata_to_apply:
if filename not in files:
with open(filename, 'r') as f:
files[filename] = [0, f.readlines()]
files[filename][1].insert(line + files[filename][0], metadata)
files[filename][0] += 1
for filename in files:
with open(filename, 'w') as f:
f.writelines(files[filename][1])
print(f'Wrote {filename}.')
# Local Variables: # Local Variables:
# python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python" # python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"