reconcile: Move code into functions, add totals.
This commit is contained in:
parent
4bb6177e45
commit
6d7df795cb
1 changed files with 182 additions and 115 deletions
|
@ -1,5 +1,12 @@
|
||||||
"""Reconcile an AMEX CSV statement against the books and print differences.
|
"""Reconcile an AMEX CSV statement against the books and print differences.
|
||||||
|
|
||||||
|
Beancount users often write importers to create bookkeeping entries direct from
|
||||||
|
a bank statement or similar. That approach automates data entry and
|
||||||
|
reconciliation in one step. In some cases though, it's useful to manually enter
|
||||||
|
transactions and reconcile them later on. This workflow helpful in cases like
|
||||||
|
writing a paper check when there's a time lag between committing to making a
|
||||||
|
payment and the funds being debited. That's the workflow we're using here.
|
||||||
|
|
||||||
Run like this:
|
Run like this:
|
||||||
|
|
||||||
$ python3 -m pip install thefuzz
|
$ python3 -m pip install thefuzz
|
||||||
|
@ -36,18 +43,31 @@ TODO/ISSUES:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
|
import collections
|
||||||
import csv
|
import csv
|
||||||
import datetime
|
import datetime
|
||||||
import decimal
|
import decimal
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
from typing import Dict, List, Tuple
|
import sys
|
||||||
|
from typing import Callable, Dict, List, Tuple, TextIO
|
||||||
|
|
||||||
from beancount import loader
|
from beancount import loader
|
||||||
from beancount.query.query import run_query
|
from beancount.query.query import run_query
|
||||||
|
|
||||||
|
if not sys.warnoptions:
|
||||||
|
import warnings
|
||||||
|
# Disable annoying warning from thefuzz prompting for a C extension. The
|
||||||
|
# current pure-Python implementation isn't a bottleneck for us.
|
||||||
|
warnings.filterwarnings('ignore', category=UserWarning, module='thefuzz.fuzz')
|
||||||
from thefuzz import fuzz # type: ignore
|
from thefuzz import fuzz # type: ignore
|
||||||
|
|
||||||
# NOTE: Statement doesn't seem to give us a running balance or a final total.
|
# NOTE: Statement doesn't seem to give us a running balance or a final total.
|
||||||
|
|
||||||
|
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
return sort_records([standardize_statement_record(row, reader.line_num) for row in reader])
|
||||||
|
|
||||||
|
|
||||||
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||||
"""Turn an AMEX CSV row into a standard dict format representing a transaction."""
|
"""Turn an AMEX CSV row into a standard dict format representing a transaction."""
|
||||||
|
@ -59,6 +79,15 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def standardize_fr_record(row: Dict, line: int) -> Dict:
|
||||||
|
return {
|
||||||
|
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
|
||||||
|
'amount': decimal.Decimal(row['Amount']),
|
||||||
|
'payee': row['Detail'] or row['Description'],
|
||||||
|
'line': line,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
||||||
"""Turn a Beancount query result row into a standard dict representing a transaction."""
|
"""Turn a Beancount query result row into a standard dict representing a transaction."""
|
||||||
return {
|
return {
|
||||||
|
@ -70,14 +99,6 @@ def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
||||||
'statement': row.posting_statement,
|
'statement': row.posting_statement,
|
||||||
}
|
}
|
||||||
|
|
||||||
def standardize_fr_record(row: Dict, line: int) -> Dict:
|
|
||||||
return {
|
|
||||||
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
|
|
||||||
'amount': decimal.Decimal(row['Amount']),
|
|
||||||
'payee': row['Detail'] or row['Description'],
|
|
||||||
'line': line,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def format_record(record: Dict) -> str:
|
def format_record(record: Dict) -> str:
|
||||||
return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}"
|
return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}"
|
||||||
|
@ -87,6 +108,49 @@ def sort_records(records: List) -> List:
|
||||||
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
||||||
|
|
||||||
|
|
||||||
|
def match_statement_and_books(statement_trans: list, books_trans: list, show_reconciled_matches: bool, csv_statement: str) -> tuple[list, list, decimal.Decimal]:
|
||||||
|
matches = []
|
||||||
|
metadata_to_apply = []
|
||||||
|
total_matched = decimal.Decimal(0)
|
||||||
|
total_missing_from_books = decimal.Decimal(0)
|
||||||
|
total_missing_from_statement = decimal.Decimal(0)
|
||||||
|
|
||||||
|
# Run through all the statement transactions to find a matching transaction in
|
||||||
|
# the books. If found, the books transaction is marked off so that it can only
|
||||||
|
# be matched once. Some transactions will be matched, some will be on the
|
||||||
|
# statement but not the books and some on the books but not the statement.
|
||||||
|
#
|
||||||
|
# Doesn't currently find exact matches when there are a bunch of transactions
|
||||||
|
# for the same amount on the same day. Probably ought to do a first pass through
|
||||||
|
# the books transactions to find an closely matching payee, then do another pass
|
||||||
|
# disregarding payee.
|
||||||
|
|
||||||
|
# TODO: What if th
|
||||||
|
for r1 in statement_trans:
|
||||||
|
for r2 in books_trans:
|
||||||
|
match, note = records_match(r1, r2)
|
||||||
|
if match:
|
||||||
|
if not r2['statement'] or show_reconciled_matches:
|
||||||
|
matches.append([r2['date'], f'{format_record(r1)} → {format_record(r2)} ✓ {note}'])
|
||||||
|
total_matched += r2['amount']
|
||||||
|
if not r2['statement']:
|
||||||
|
metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{os.path.basename(csv_statement)}:{r2["line"]}"\n'))
|
||||||
|
books_trans.remove(r2)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
matches.append([r1['date'], f'{format_record(r1)} → {" ":^45} ✗ Not in books ({os.path.basename(csv_statement)}:{r1["line"]})'])
|
||||||
|
total_missing_from_books += r1['amount']
|
||||||
|
for r2 in books_trans:
|
||||||
|
matches.append([r2['date'], f'{" ":^45} → {format_record(r2)} ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})'])
|
||||||
|
total_missing_from_statement += r2['amount']
|
||||||
|
return matches, metadata_to_apply, total_matched, total_missing_from_books, total_missing_from_statement
|
||||||
|
|
||||||
|
# TODO: Time for some test cases I think.
|
||||||
|
|
||||||
|
# TODO: Could potentially return a score so that we can find the best match from
|
||||||
|
# a pool of candidates. How would be then remove that candidate from the global
|
||||||
|
# pool?
|
||||||
|
|
||||||
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
||||||
"""Do these records represent the same transaction?"""
|
"""Do these records represent the same transaction?"""
|
||||||
date_matches_exactly = r1['date'] == r2['date']
|
date_matches_exactly = r1['date'] == r2['date']
|
||||||
|
@ -106,123 +170,126 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
|
||||||
return False, ''
|
return False, ''
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Reconciliation helper')
|
# TODO: Is there a way to pull the side-effecting code out of this function?
|
||||||
parser.add_argument('--beancount-file', required=True)
|
|
||||||
parser.add_argument('--csv-statement', required=True)
|
|
||||||
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
|
|
||||||
parser.add_argument('--grep-output-filename')
|
|
||||||
# parser.add_argument('--report-group-regex')
|
|
||||||
parser.add_argument('--show-reconciled-matches', action='store_true')
|
|
||||||
parser.add_argument('--statement-total', required=True)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
statement_total = decimal.Decimal(args.statement_total)
|
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
||||||
|
"""Insert reconciliation metadata in the books files.
|
||||||
|
|
||||||
# TODO: Should put in a sanity check to make sure the statement you're feeding
|
Takes a list of edits to make as tuples of form (filename, lineno, metadata):
|
||||||
# in matches the account you've provided.
|
|
||||||
if 'AMEX' in args.account:
|
|
||||||
standardize_statement_record = standardize_amex_record
|
|
||||||
else:
|
|
||||||
standardize_statement_record = standardize_fr_record
|
|
||||||
|
|
||||||
with open(args.csv_statement) as f:
|
[
|
||||||
reader = csv.DictReader(f)
|
('2021/main.beancount', 4245, ' bank-statement: statement.pdf'),
|
||||||
statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader])
|
('2021/main.beancount', 1057, ' bank-statement: statement.pdf'),
|
||||||
|
('2021/payroll.beancount', 257, ' bank-statement: statement.pdf'),
|
||||||
|
...,
|
||||||
|
]
|
||||||
|
|
||||||
begin_date = statement_trans[0]['date']
|
"""
|
||||||
end_date = statement_trans[-1]['date']
|
file_contents: dict[str, list] = {}
|
||||||
|
file_offsets: dict[str, int] = collections.defaultdict(int)
|
||||||
|
# Load each books file into memory and insert the relevant metadata lines.
|
||||||
|
# Line numbers change as we do this, so we keep track of the offset for each
|
||||||
|
# file. Changes must be sorted by line number first or else the offsets will
|
||||||
|
# break because we're jumping around making edits.
|
||||||
|
for filename, line, metadata in sorted(metadata_to_apply):
|
||||||
|
if filename not in file_contents:
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
file_contents[filename] = f.readlines()
|
||||||
|
# Insert is inefficient, but fast enough for now in practise.
|
||||||
|
file_contents[filename].insert(line + file_offsets[filename], metadata)
|
||||||
|
file_offsets[filename] += 1
|
||||||
|
# Writes each updated file back to disk.
|
||||||
|
for filename, contents in file_contents.items():
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
f.writelines(contents)
|
||||||
|
print(f'Wrote {filename}.')
|
||||||
|
|
||||||
# Do we traverse and filter the in-memory entries list and filter that, or do we
|
|
||||||
# use Beancount Query Language (BQL) to get a list of transactions? Currently
|
|
||||||
# using BQL.
|
|
||||||
#
|
|
||||||
# beancount.query.query_compile.compile() and
|
|
||||||
# beancount.query.query_execute.filter_entries() look useful in this respect,
|
|
||||||
# but I'm not clear on how to use compile(). An example would help.
|
|
||||||
entries, _, options = loader.load_file(args.beancount_file)
|
|
||||||
|
|
||||||
cleared_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
|
def parse_args(argv):
|
||||||
AND date <= {end_date.isoformat()} AND META('bank-statement') != NULL"""
|
parser = argparse.ArgumentParser(description='Reconciliation helper')
|
||||||
result_types, result_rows = run_query(
|
parser.add_argument('--beancount-file', required=True)
|
||||||
entries,
|
parser.add_argument('--csv-statement', required=True)
|
||||||
options,
|
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
|
||||||
cleared_query,
|
parser.add_argument('--grep-output-filename')
|
||||||
numberify=True,
|
# parser.add_argument('--report-group-regex')
|
||||||
)
|
parser.add_argument('--show-reconciled-matches', action='store_true')
|
||||||
|
parser.add_argument('--statement-balance', type=decimal.Decimal, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
|
||||||
|
parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")
|
||||||
|
return parser.parse_args(args=argv[1:])
|
||||||
|
|
||||||
cleared_total = result_rows[0][0]
|
|
||||||
|
|
||||||
# String concatenation looks bad, but there's no SQL injection possible here
|
def main(args):
|
||||||
# because BQL can't write back to the Beancount files. I hope!
|
# TODO: Should put in a sanity check to make sure the statement you're feeding
|
||||||
query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = '{args.account}' and date >= {begin_date} and date <= {end_date}"
|
# in matches the account you've provided.
|
||||||
result_types, result_rows = run_query(
|
if 'AMEX' in args.account:
|
||||||
entries,
|
standardize_statement_record = standardize_amex_record
|
||||||
options,
|
|
||||||
query,
|
|
||||||
)
|
|
||||||
|
|
||||||
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
|
||||||
|
|
||||||
num_statement_records = len(statement_trans)
|
|
||||||
num_books_trans = len(books_trans)
|
|
||||||
statement_index = 0
|
|
||||||
books_index = 0
|
|
||||||
matches = []
|
|
||||||
metadata_to_apply = []
|
|
||||||
|
|
||||||
# Run through all the statement transactions to find a matching transaction in
|
|
||||||
# the books. If found, the books transaction is marked off so that it can only
|
|
||||||
# be matched once. Some transactions will be matched, some will be on the
|
|
||||||
# statement but not the books and some on the books but not the statement.
|
|
||||||
#
|
|
||||||
# Doesn't currently find exact matches when there are a bunch of transactions
|
|
||||||
# for the same amount on the same day. Probably ought to do a first pass through
|
|
||||||
# the books transactions to find an closely matching payee, then do another pass
|
|
||||||
# disregarding payee.
|
|
||||||
for r1 in statement_trans:
|
|
||||||
for r2 in books_trans:
|
|
||||||
match, note = records_match(r1, r2)
|
|
||||||
if match:
|
|
||||||
if not r2['statement'] or args.show_reconciled_matches:
|
|
||||||
matches.append([r2['date'], f'{format_record(r1)} → {format_record(r2)} ✓ {note}'])
|
|
||||||
if not r2['statement']:
|
|
||||||
metadata_to_apply.append((r2['filename'], r2['line'], f' bank-statement: "{os.path.basename(args.csv_statement)}:{r2["line"]}"\n'))
|
|
||||||
books_trans.remove(r2)
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
matches.append([r1['date'], f'{format_record(r1)} → {" ":^45} ✗ Not in books ({os.path.basename(args.csv_statement)}:{r1["line"]})'])
|
standardize_statement_record = standardize_fr_record
|
||||||
for r2 in books_trans:
|
|
||||||
matches.append([r2['date'], f'{" ":^45} → {format_record(r2)} ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})'])
|
|
||||||
|
|
||||||
print('-' * 155)
|
with open(args.csv_statement) as f:
|
||||||
print(f'{"Statement transaction":<38} {"Books transaction":<44} Notes')
|
statement_trans = read_transactions_from_csv(f, standardize_statement_record)
|
||||||
print('-' * 155)
|
|
||||||
for _, output in sorted(matches):
|
|
||||||
print(output)
|
|
||||||
print('-' * 155)
|
|
||||||
print(f'STATEMENT TOTAL: {statement_total}')
|
|
||||||
print(f'CLEARED TOTAL: {cleared_total:12,.2f}')
|
|
||||||
print('-' * 155)
|
|
||||||
|
|
||||||
# Write statement metadata back to books
|
begin_date = statement_trans[0]['date']
|
||||||
if metadata_to_apply:
|
end_date = statement_trans[-1]['date']
|
||||||
print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
|
|
||||||
if input().lower() == 'y':
|
# Do we traverse and filter the in-memory entries list and filter that, or do we
|
||||||
files = {}
|
# use Beancount Query Language (BQL) to get a list of transactions? Currently
|
||||||
# Query results aren't necessarily sequential in a file, so need to sort
|
# using BQL.
|
||||||
# so that our line number offsets work.
|
#
|
||||||
for filename, line, metadata in sorted(metadata_to_apply):
|
# beancount.query.query_compile.compile() and
|
||||||
if filename not in files:
|
# beancount.query.query_execute.filter_entries() look useful in this respect,
|
||||||
with open(filename, 'r') as f:
|
# but I'm not clear on how to use compile(). An example would help.
|
||||||
# print(f'Opening {filename}.')
|
entries, _, options = loader.load_file(args.beancount_file)
|
||||||
files[filename] = [0, f.readlines()] # Offset and contents
|
|
||||||
files[filename][1].insert(line + files[filename][0], metadata)
|
books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
|
||||||
files[filename][0] += 1
|
AND date <= {end_date.isoformat()}"""
|
||||||
# print(f'File {filename} offset {files[filename][0]}')
|
result_types, result_rows = run_query(entries, options, books_balance_query, numberify=True)
|
||||||
for filename in files:
|
books_balance = result_rows[0][0] if result_rows else 0
|
||||||
with open(filename, 'w') as f:
|
|
||||||
f.writelines(files[filename][1])
|
books_balance_reconciled_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
|
||||||
print(f'Wrote {filename}.')
|
AND date <= {end_date.isoformat()} AND META('bank-statement') != NULL"""
|
||||||
|
result_types, result_rows = run_query(entries, options, books_balance_reconciled_query, numberify=True)
|
||||||
|
books_balance_reconciled = result_rows[0][0] if result_rows else 0
|
||||||
|
|
||||||
|
# String concatenation looks bad, but there's no SQL injection possible here
|
||||||
|
# because BQL can't write back to the Beancount files. I hope!
|
||||||
|
query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = '{args.account}' and date >= {begin_date} and date <= {end_date}"
|
||||||
|
result_types, result_rows = run_query(entries, options, query)
|
||||||
|
|
||||||
|
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
||||||
|
|
||||||
|
matches, metadata_to_apply, total_matched, total_missing_from_books, total_missing_from_statement = match_statement_and_books(
|
||||||
|
statement_trans, books_trans, args.show_reconciled_matches, args.csv_statement)
|
||||||
|
|
||||||
|
out = io.StringIO()
|
||||||
|
print('-' * 155)
|
||||||
|
print(f'{"Statement transaction":<38} {"Books transaction":<44} Notes')
|
||||||
|
print('-' * 155)
|
||||||
|
for _, output in sorted(matches):
|
||||||
|
print(output)
|
||||||
|
print('-' * 155)
|
||||||
|
print(f'Period: {begin_date} to {end_date}')
|
||||||
|
print(f'Statement/cleared balance: {args.statement_balance:12,.2f} (as provided by you)')
|
||||||
|
print(f'Books balance (all): {books_balance:12,.2f} (all transactions, includes unreconciled)')
|
||||||
|
print(f'Books balance (reconciled): {books_balance_reconciled:12,.2f} (transactions with "bank-statement" tag only)')
|
||||||
|
print(f'Matched above: {total_matched:12,.2f} ("bank-statement" tag yet to be applied)')
|
||||||
|
print(f'On statement only: {total_missing_from_books:12,.2f} (no match in books)')
|
||||||
|
print(f'On books only: {total_missing_from_statement:12,.2f} (no match on statement)')
|
||||||
|
print('-' * 155)
|
||||||
|
# print(f'Remaning to reconcile: {books_balance - books_balance_reconciled - total_matched:12,.2f}')
|
||||||
|
# print(f'Total reconciled inc. above: {books_balance_reconciled + total_matched:12,.2f}')
|
||||||
|
# print('-' * 155)
|
||||||
|
|
||||||
|
# Write statement metadata back to books
|
||||||
|
if metadata_to_apply and not args.non_interactive:
|
||||||
|
print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
|
||||||
|
if input().lower() == 'y':
|
||||||
|
write_metadata_to_books(metadata_to_apply)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = parse_args(sys.argv)
|
||||||
|
main(args)
|
||||||
|
|
||||||
# Local Variables:
|
# Local Variables:
|
||||||
# python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"
|
# python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python"
|
||||||
|
|
Loading…
Reference in a new issue