reconcile: Move code into functions, add totals.
This commit is contained in:
		
							parent
							
								
									4bb6177e45
								
							
						
					
					
						commit
						6d7df795cb
					
				
					 1 changed files with 182 additions and 115 deletions
				
			
		|  | @ -1,5 +1,12 @@ | ||||||
| """Reconcile an AMEX CSV statement against the books and print differences. | """Reconcile an AMEX CSV statement against the books and print differences. | ||||||
| 
 | 
 | ||||||
|  | Beancount users often write importers to create bookkeeping entries direct from | ||||||
|  | a bank statement or similar. That approach automates data entry and | ||||||
|  | reconciliation in one step. In some cases though, it's useful to manually enter | ||||||
|  | transactions and reconcile them later on. This workflow helpful in cases like | ||||||
|  | writing a paper check when there's a time lag between committing to making a | ||||||
|  | payment and the funds being debited. That's the workflow we're using here. | ||||||
|  | 
 | ||||||
| Run like this: | Run like this: | ||||||
| 
 | 
 | ||||||
| $ python3 -m pip install thefuzz | $ python3 -m pip install thefuzz | ||||||
|  | @ -36,18 +43,31 @@ TODO/ISSUES: | ||||||
| 
 | 
 | ||||||
| """ | """ | ||||||
| import argparse | import argparse | ||||||
|  | import collections | ||||||
| import csv | import csv | ||||||
| import datetime | import datetime | ||||||
| import decimal | import decimal | ||||||
|  | import io | ||||||
| import os | import os | ||||||
| from typing import Dict, List, Tuple | import sys | ||||||
|  | from typing import Callable, Dict, List, Tuple, TextIO | ||||||
| 
 | 
 | ||||||
| from beancount import loader | from beancount import loader | ||||||
| from beancount.query.query import run_query | from beancount.query.query import run_query | ||||||
|  | 
 | ||||||
|  | if not sys.warnoptions: | ||||||
|  |     import warnings | ||||||
|  |     # Disable annoying warning from thefuzz prompting for a C extension. The | ||||||
|  |     # current pure-Python implementation isn't a bottleneck for us. | ||||||
|  |     warnings.filterwarnings('ignore', category=UserWarning, module='thefuzz.fuzz') | ||||||
| from thefuzz import fuzz  # type: ignore | from thefuzz import fuzz  # type: ignore | ||||||
| 
 | 
 | ||||||
| # NOTE: Statement doesn't seem to give us a running balance or a final total. | # NOTE: Statement doesn't seem to give us a running balance or a final total. | ||||||
| 
 | 
 | ||||||
|  | def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list: | ||||||
|  |     reader = csv.DictReader(f) | ||||||
|  |     return sort_records([standardize_statement_record(row, reader.line_num) for row in reader]) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def standardize_amex_record(row: Dict, line: int) -> Dict: | def standardize_amex_record(row: Dict, line: int) -> Dict: | ||||||
|     """Turn an AMEX CSV row into a standard dict format representing a transaction.""" |     """Turn an AMEX CSV row into a standard dict format representing a transaction.""" | ||||||
|  | @ -59,6 +79,15 @@ def standardize_amex_record(row: Dict, line: int) -> Dict: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def standardize_fr_record(row: Dict, line: int) -> Dict: | ||||||
|  |     return { | ||||||
|  |         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), | ||||||
|  |         'amount': decimal.Decimal(row['Amount']), | ||||||
|  |         'payee': row['Detail'] or row['Description'], | ||||||
|  |         'line': line, | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def] | def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def] | ||||||
|     """Turn a Beancount query result row into a standard dict representing a transaction.""" |     """Turn a Beancount query result row into a standard dict representing a transaction.""" | ||||||
|     return { |     return { | ||||||
|  | @ -70,14 +99,6 @@ def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def] | ||||||
|         'statement': row.posting_statement, |         'statement': row.posting_statement, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| def standardize_fr_record(row: Dict, line: int) -> Dict: |  | ||||||
|     return { |  | ||||||
|         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), |  | ||||||
|         'amount': decimal.Decimal(row['Amount']), |  | ||||||
|         'payee': row['Detail'] or row['Description'], |  | ||||||
|         'line': line, |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| def format_record(record: Dict) -> str: | def format_record(record: Dict) -> str: | ||||||
|     return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}" |     return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}" | ||||||
|  | @ -87,6 +108,49 @@ def sort_records(records: List) -> List: | ||||||
|     return sorted(records, key=lambda x: (x['date'], x['amount'])) |     return sorted(records, key=lambda x: (x['date'], x['amount'])) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def match_statement_and_books(statement_trans: list, books_trans: list, show_reconciled_matches: bool, csv_statement: str) -> tuple[list, list, decimal.Decimal]: | ||||||
|  |     matches = [] | ||||||
|  |     metadata_to_apply = [] | ||||||
|  |     total_matched = decimal.Decimal(0) | ||||||
|  |     total_missing_from_books = decimal.Decimal(0) | ||||||
|  |     total_missing_from_statement = decimal.Decimal(0) | ||||||
|  | 
 | ||||||
|  |     # Run through all the statement transactions to find a matching transaction in | ||||||
|  |     # the books. If found, the books transaction is marked off so that it can only | ||||||
|  |     # be matched once. Some transactions will be matched, some will be on the | ||||||
|  |     # statement but not the books and some on the books but not the statement. | ||||||
|  |     # | ||||||
|  |     # Doesn't currently find exact matches when there are a bunch of transactions | ||||||
|  |     # for the same amount on the same day. Probably ought to do a first pass through | ||||||
|  |     # the books transactions to find an closely matching payee, then do another pass | ||||||
|  |     # disregarding payee. | ||||||
|  | 
 | ||||||
|  |     # TODO: What if th | ||||||
|  |     for r1 in statement_trans: | ||||||
|  |         for r2 in books_trans: | ||||||
|  |             match, note = records_match(r1, r2) | ||||||
|  |             if match: | ||||||
|  |                 if not r2['statement'] or show_reconciled_matches: | ||||||
|  |                         matches.append([r2['date'], f'{format_record(r1)}  →  {format_record(r2)}  ✓ {note}']) | ||||||
|  |                         total_matched += r2['amount'] | ||||||
|  |                 if not r2['statement']: | ||||||
|  |                     metadata_to_apply.append((r2['filename'], r2['line'], f'    bank-statement: "{os.path.basename(csv_statement)}:{r2["line"]}"\n')) | ||||||
|  |                 books_trans.remove(r2) | ||||||
|  |                 break | ||||||
|  |         else: | ||||||
|  |             matches.append([r1['date'], f'{format_record(r1)}  →  {" ":^45}  ✗ Not in books ({os.path.basename(csv_statement)}:{r1["line"]})']) | ||||||
|  |             total_missing_from_books += r1['amount'] | ||||||
|  |     for r2 in books_trans: | ||||||
|  |         matches.append([r2['date'], f'{" ":^45}  →  {format_record(r2)}  ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})']) | ||||||
|  |         total_missing_from_statement += r2['amount'] | ||||||
|  |     return matches, metadata_to_apply, total_matched, total_missing_from_books, total_missing_from_statement | ||||||
|  | 
 | ||||||
|  | # TODO: Time for some test cases I think. | ||||||
|  | 
 | ||||||
|  | # TODO: Could potentially return a score so that we can find the best match from | ||||||
|  | # a pool of candidates. How would be then remove that candidate from the global | ||||||
|  | # pool? | ||||||
|  | 
 | ||||||
| def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: | def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: | ||||||
|     """Do these records represent the same transaction?""" |     """Do these records represent the same transaction?""" | ||||||
|     date_matches_exactly = r1['date'] == r2['date'] |     date_matches_exactly = r1['date'] == r2['date'] | ||||||
|  | @ -106,123 +170,126 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]: | ||||||
|         return False, '' |         return False, '' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| parser = argparse.ArgumentParser(description='Reconciliation helper') | # TODO: Is there a way to pull the side-effecting code out of this function? | ||||||
| parser.add_argument('--beancount-file', required=True) |  | ||||||
| parser.add_argument('--csv-statement', required=True) |  | ||||||
| parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX') |  | ||||||
| parser.add_argument('--grep-output-filename') |  | ||||||
| # parser.add_argument('--report-group-regex') |  | ||||||
| parser.add_argument('--show-reconciled-matches', action='store_true') |  | ||||||
| parser.add_argument('--statement-total', required=True) |  | ||||||
| args = parser.parse_args() |  | ||||||
| 
 | 
 | ||||||
| statement_total = decimal.Decimal(args.statement_total) | def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None: | ||||||
|  |     """Insert reconciliation metadata in the books files. | ||||||
| 
 | 
 | ||||||
| # TODO: Should put in a sanity check to make sure the statement you're feeding |     Takes a list of edits to make as tuples of form (filename, lineno, metadata): | ||||||
| # in matches the account you've provided. |  | ||||||
| if 'AMEX' in args.account: |  | ||||||
|     standardize_statement_record = standardize_amex_record |  | ||||||
| else: |  | ||||||
|     standardize_statement_record = standardize_fr_record |  | ||||||
| 
 | 
 | ||||||
| with open(args.csv_statement) as f: |     [ | ||||||
|     reader = csv.DictReader(f) |         ('2021/main.beancount', 4245, '    bank-statement: statement.pdf'), | ||||||
|     statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader]) |         ('2021/main.beancount', 1057, '    bank-statement: statement.pdf'), | ||||||
|  |         ('2021/payroll.beancount', 257, '    bank-statement: statement.pdf'), | ||||||
|  |         ..., | ||||||
|  |     ] | ||||||
| 
 | 
 | ||||||
| begin_date = statement_trans[0]['date'] |     """ | ||||||
| end_date = statement_trans[-1]['date'] |     file_contents: dict[str, list] = {} | ||||||
|  |     file_offsets: dict[str, int] = collections.defaultdict(int) | ||||||
|  |     # Load each books file into memory and insert the relevant metadata lines. | ||||||
|  |     # Line numbers change as we do this, so we keep track of the offset for each | ||||||
|  |     # file. Changes must be sorted by line number first or else the offsets will | ||||||
|  |     # break because we're jumping around making edits. | ||||||
|  |     for filename, line, metadata in sorted(metadata_to_apply): | ||||||
|  |         if filename not in file_contents: | ||||||
|  |             with open(filename, 'r') as f: | ||||||
|  |                 file_contents[filename] = f.readlines() | ||||||
|  |         # Insert is inefficient, but fast enough for now in practise. | ||||||
|  |         file_contents[filename].insert(line + file_offsets[filename], metadata) | ||||||
|  |         file_offsets[filename] += 1 | ||||||
|  |     # Writes each updated file back to disk. | ||||||
|  |     for filename, contents in file_contents.items(): | ||||||
|  |         with open(filename, 'w') as f: | ||||||
|  |             f.writelines(contents) | ||||||
|  |             print(f'Wrote {filename}.') | ||||||
| 
 | 
 | ||||||
| # Do we traverse and filter the in-memory entries list and filter that, or do we |  | ||||||
| # use Beancount Query Language (BQL) to get a list of transactions? Currently |  | ||||||
| # using BQL. |  | ||||||
| # |  | ||||||
| # beancount.query.query_compile.compile() and |  | ||||||
| # beancount.query.query_execute.filter_entries() look useful in this respect, |  | ||||||
| # but I'm not clear on how to use compile(). An example would help. |  | ||||||
| entries, _, options = loader.load_file(args.beancount_file) |  | ||||||
| 
 | 
 | ||||||
| cleared_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}" | def parse_args(argv): | ||||||
|     AND date <= {end_date.isoformat()} AND META('bank-statement') != NULL""" |     parser = argparse.ArgumentParser(description='Reconciliation helper') | ||||||
| result_types, result_rows = run_query( |     parser.add_argument('--beancount-file', required=True) | ||||||
|     entries, |     parser.add_argument('--csv-statement', required=True) | ||||||
|     options, |     parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX') | ||||||
|     cleared_query, |     parser.add_argument('--grep-output-filename') | ||||||
|     numberify=True, |     # parser.add_argument('--report-group-regex') | ||||||
| ) |     parser.add_argument('--show-reconciled-matches', action='store_true') | ||||||
|  |     parser.add_argument('--statement-balance', type=decimal.Decimal, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals") | ||||||
|  |     parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books") | ||||||
|  |     return parser.parse_args(args=argv[1:]) | ||||||
| 
 | 
 | ||||||
| cleared_total = result_rows[0][0] |  | ||||||
| 
 | 
 | ||||||
| # String concatenation looks bad, but there's no SQL injection possible here | def main(args): | ||||||
| # because BQL can't write back to the Beancount files. I hope! |     # TODO: Should put in a sanity check to make sure the statement you're feeding | ||||||
| query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = '{args.account}' and date >= {begin_date} and date <= {end_date}" |     # in matches the account you've provided. | ||||||
| result_types, result_rows = run_query( |     if 'AMEX' in args.account: | ||||||
|     entries, |         standardize_statement_record = standardize_amex_record | ||||||
|     options, |  | ||||||
|     query, |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) |  | ||||||
| 
 |  | ||||||
| num_statement_records = len(statement_trans) |  | ||||||
| num_books_trans = len(books_trans) |  | ||||||
| statement_index = 0 |  | ||||||
| books_index = 0 |  | ||||||
| matches = [] |  | ||||||
| metadata_to_apply = [] |  | ||||||
| 
 |  | ||||||
| # Run through all the statement transactions to find a matching transaction in |  | ||||||
| # the books. If found, the books transaction is marked off so that it can only |  | ||||||
| # be matched once. Some transactions will be matched, some will be on the |  | ||||||
| # statement but not the books and some on the books but not the statement. |  | ||||||
| # |  | ||||||
| # Doesn't currently find exact matches when there are a bunch of transactions |  | ||||||
| # for the same amount on the same day. Probably ought to do a first pass through |  | ||||||
| # the books transactions to find an closely matching payee, then do another pass |  | ||||||
| # disregarding payee. |  | ||||||
| for r1 in statement_trans: |  | ||||||
|     for r2 in books_trans: |  | ||||||
|         match, note = records_match(r1, r2) |  | ||||||
|         if match: |  | ||||||
|             if not r2['statement'] or args.show_reconciled_matches: |  | ||||||
|                     matches.append([r2['date'], f'{format_record(r1)}  →  {format_record(r2)}  ✓ {note}']) |  | ||||||
|             if not r2['statement']: |  | ||||||
|                 metadata_to_apply.append((r2['filename'], r2['line'], f'    bank-statement: "{os.path.basename(args.csv_statement)}:{r2["line"]}"\n')) |  | ||||||
|             books_trans.remove(r2) |  | ||||||
|             break |  | ||||||
|     else: |     else: | ||||||
|         matches.append([r1['date'], f'{format_record(r1)}  →  {" ":^45}  ✗ Not in books ({os.path.basename(args.csv_statement)}:{r1["line"]})']) |         standardize_statement_record = standardize_fr_record | ||||||
| for r2 in books_trans: |  | ||||||
|     matches.append([r2['date'], f'{" ":^45}  →  {format_record(r2)}  ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})']) |  | ||||||
| 
 | 
 | ||||||
| print('-' * 155) |     with open(args.csv_statement) as f: | ||||||
| print(f'{"Statement transaction":<38}            {"Books transaction":<44}   Notes') |         statement_trans = read_transactions_from_csv(f, standardize_statement_record) | ||||||
| print('-' * 155) |  | ||||||
| for _, output in sorted(matches): |  | ||||||
|     print(output) |  | ||||||
| print('-' * 155) |  | ||||||
| print(f'STATEMENT TOTAL: {statement_total}') |  | ||||||
| print(f'CLEARED TOTAL:   {cleared_total:12,.2f}') |  | ||||||
| print('-' * 155) |  | ||||||
| 
 | 
 | ||||||
| # Write statement metadata back to books |     begin_date = statement_trans[0]['date'] | ||||||
| if metadata_to_apply: |     end_date = statement_trans[-1]['date'] | ||||||
|     print('Mark matched transactions as reconciled in the books? (y/N) ', end='') | 
 | ||||||
|     if input().lower() == 'y': |     # Do we traverse and filter the in-memory entries list and filter that, or do we | ||||||
|         files = {} |     # use Beancount Query Language (BQL) to get a list of transactions? Currently | ||||||
|         # Query results aren't necessarily sequential in a file, so need to sort |     # using BQL. | ||||||
|         # so that our line number offsets work. |     # | ||||||
|         for filename, line, metadata in sorted(metadata_to_apply): |     # beancount.query.query_compile.compile() and | ||||||
|             if filename not in files: |     # beancount.query.query_execute.filter_entries() look useful in this respect, | ||||||
|                 with open(filename, 'r') as f: |     # but I'm not clear on how to use compile(). An example would help. | ||||||
|                     # print(f'Opening {filename}.') |     entries, _, options = loader.load_file(args.beancount_file) | ||||||
|                     files[filename] = [0, f.readlines()]  # Offset and contents | 
 | ||||||
|             files[filename][1].insert(line + files[filename][0], metadata) |     books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}" | ||||||
|             files[filename][0] += 1 |         AND date <= {end_date.isoformat()}""" | ||||||
|             # print(f'File {filename} offset {files[filename][0]}') |     result_types, result_rows = run_query(entries, options, books_balance_query, numberify=True) | ||||||
|         for filename in files: |     books_balance = result_rows[0][0] if result_rows else 0 | ||||||
|             with open(filename, 'w') as f: | 
 | ||||||
|                 f.writelines(files[filename][1]) |     books_balance_reconciled_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}" | ||||||
|                 print(f'Wrote {filename}.') |         AND date <= {end_date.isoformat()} AND META('bank-statement') != NULL""" | ||||||
|  |     result_types, result_rows = run_query(entries, options, books_balance_reconciled_query, numberify=True) | ||||||
|  |     books_balance_reconciled = result_rows[0][0] if result_rows else 0 | ||||||
|  | 
 | ||||||
|  |     # String concatenation looks bad, but there's no SQL injection possible here | ||||||
|  |     # because BQL can't write back to the Beancount files. I hope! | ||||||
|  |     query = f"SELECT filename, META('lineno') AS posting_line, META('bank-statement') AS posting_statement, date, number(cost(position)), payee, narration where account = '{args.account}' and date >= {begin_date} and date <= {end_date}" | ||||||
|  |     result_types, result_rows = run_query(entries, options, query) | ||||||
|  | 
 | ||||||
|  |     books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) | ||||||
|  | 
 | ||||||
|  |     matches, metadata_to_apply, total_matched, total_missing_from_books, total_missing_from_statement = match_statement_and_books( | ||||||
|  |         statement_trans, books_trans, args.show_reconciled_matches, args.csv_statement) | ||||||
|  | 
 | ||||||
|  |     out = io.StringIO() | ||||||
|  |     print('-' * 155) | ||||||
|  |     print(f'{"Statement transaction":<38}            {"Books transaction":<44}   Notes') | ||||||
|  |     print('-' * 155) | ||||||
|  |     for _, output in sorted(matches): | ||||||
|  |         print(output) | ||||||
|  |     print('-' * 155) | ||||||
|  |     print(f'Period: {begin_date} to {end_date}') | ||||||
|  |     print(f'Statement/cleared balance:  {args.statement_balance:12,.2f}    (as provided by you)') | ||||||
|  |     print(f'Books balance (all):        {books_balance:12,.2f}    (all transactions, includes unreconciled)') | ||||||
|  |     print(f'Books balance (reconciled): {books_balance_reconciled:12,.2f}    (transactions with "bank-statement" tag only)') | ||||||
|  |     print(f'Matched above:              {total_matched:12,.2f}    ("bank-statement" tag yet to be applied)') | ||||||
|  |     print(f'On statement only:          {total_missing_from_books:12,.2f}    (no match in books)') | ||||||
|  |     print(f'On books only:              {total_missing_from_statement:12,.2f}    (no match on statement)') | ||||||
|  |     print('-' * 155) | ||||||
|  |     # print(f'Remaning to reconcile:          {books_balance - books_balance_reconciled - total_matched:12,.2f}') | ||||||
|  |     # print(f'Total reconciled inc. above:    {books_balance_reconciled + total_matched:12,.2f}') | ||||||
|  |     # print('-' * 155) | ||||||
|  | 
 | ||||||
|  |     # Write statement metadata back to books | ||||||
|  |     if metadata_to_apply and not args.non_interactive: | ||||||
|  |         print('Mark matched transactions as reconciled in the books? (y/N) ', end='') | ||||||
|  |         if input().lower() == 'y': | ||||||
|  |             write_metadata_to_books(metadata_to_apply) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     args = parse_args(sys.argv) | ||||||
|  |     main(args) | ||||||
| 
 | 
 | ||||||
| # Local Variables: | # Local Variables: | ||||||
| # python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python" | # python-shell-interpreter: "/home/ben/\.virtualenvs/conservancy-beancount-py39/bin/python" | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue