From 3f735bec954038aabf5da27f5253c593bdd5d43c Mon Sep 17 00:00:00 2001
From: Ben Sturmfels <ben@sturm.com.au>
Date: Wed, 9 Feb 2022 14:03:05 +1100
Subject: [PATCH] reconcile: Show file and line number of mismatches.

---
 .../reconcile/prototype_amex_reconciler.py    | 56 +++++++++++--------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/conservancy_beancount/reconcile/prototype_amex_reconciler.py b/conservancy_beancount/reconcile/prototype_amex_reconciler.py
index 84cd638..5bba689 100644
--- a/conservancy_beancount/reconcile/prototype_amex_reconciler.py
+++ b/conservancy_beancount/reconcile/prototype_amex_reconciler.py
@@ -13,14 +13,14 @@ This tool is still somewhat like an importer in that it needs to extract
 transaction details from a third-party statement. Instead of creating
 directives, it just checks to see that similar directives are already present.
 
-Problem this attempts to address:
- - errors in the books take hours to find during reconciliation ("you're entering a world of pain"
- - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
- - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
+Problems this attempts to address:
+ - errors in the books take hours to find during reconciliation ("you're entering a world of pain")
  - adding statement/reconciliation metadata to books is manual and prone to mistakes
+ - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
+ - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
  - creating reconciliation reports
- - normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing)
  - jumping to an individual transaction in the books isn't trivial - Emacs grep mode is helpful
+ - normally transactions are entered manually, but potentially could create transaction directives (a.k.a. importing)
 
 Q. How are reconciliation reports created currently? How do you read them?
 
@@ -32,6 +32,7 @@ import argparse
 import csv
 import datetime
 import decimal
+import os
 from typing import Dict, List, Tuple
 
 from beancount import loader
@@ -41,11 +42,12 @@ from thefuzz import fuzz  # type: ignore
 # NOTE: Statement doesn't seem to give us a running balance or a final total.
 
 
-def standardize_amex_record(row: Dict) -> Dict:
+def standardize_amex_record(row: Dict, line: int) -> Dict:
     """Turn an AMEX CSV row into a standard dict format representing a transaction."""
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
         'amount': -1 * decimal.Decimal(row['Amount']),
+        'line': line,
         'payee': row['Description'] or '',
     }
 
@@ -61,16 +63,17 @@ def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def]
         'statement': row.posting_statement,
     }
 
-def standardize_fr_record(row: Dict) -> Dict:
+def standardize_fr_record(row: Dict, line: int) -> Dict:
     return {
         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
         'amount': decimal.Decimal(row['Amount']),
-        'payee': row['Detail'] or '',
+        'payee': row['Detail'] or row['Description'],
+        'line': line,
     }
 
 
 def format_record(record: Dict) -> str:
-    return f"{record['date'].isoformat()}: {record['amount']:>11} {record['payee'][:20]:<20}"
+    return f"{record['date'].isoformat()}: {record['amount']:12,.2f} {record['payee'][:20]:<20}"
 
 
 def sort_records(records: List) -> List:
@@ -79,14 +82,19 @@ def sort_records(records: List) -> List:
 
 def records_match(r1: Dict, r2: Dict) -> Tuple[bool, str]:
     """Do these records represent the same transaction?"""
-    date_matches = r1['date'] >= r2['date'] - datetime.timedelta(days=1) and r1['date'] <= r2['date'] + datetime.timedelta(days=1)
+    date_matches_exactly = r1['date'] == r2['date']
+    date_matches_loosly = r1['date'] >= r2['date'] - datetime.timedelta(days=3) and r1['date'] <= r2['date'] + datetime.timedelta(days=3)
     amount_matches = r1['amount'] == r2['amount']
     payee_match_quality = fuzz.token_set_ratio(r1['payee'], r2['payee'])
     payee_matches = payee_match_quality > 50
-    if date_matches and amount_matches and payee_matches:
+    if date_matches_exactly and amount_matches and payee_matches:
         return True, 'Matched'
-    elif date_matches and amount_matches:
+    elif date_matches_loosly and amount_matches and payee_matches:
+        return True, 'Matched +/- 3 days'
+    elif date_matches_exactly and amount_matches:
         return True, f'Matched with low-payee match ({payee_match_quality}%)'
+    elif date_matches_loosly and amount_matches:
+        return True, f'Matched +/- 3 days, low-payee match ({payee_match_quality}%)'
     else:
         return False, ''
 
@@ -109,7 +117,7 @@ else:
 
 with open(args.csv_statement) as f:
     reader = csv.DictReader(f)
-    statement_trans = sort_records([standardize_statement_record(row) for row in reader])
+    statement_trans = sort_records([standardize_statement_record(row, reader.line_num) for row in reader])
 
 begin_date = statement_trans[0]['date']
 end_date = statement_trans[-1]['date']
@@ -132,7 +140,6 @@ result_types, result_rows = run_query(
     query,
 )
 
-
 books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
 
 num_statement_records = len(statement_trans)
@@ -146,27 +153,32 @@ metadata_to_apply = []
 # the books. If found, the books transaction is marked off so that it can only
 # be matched once. Some transactions will be matched, some will be on the
 # statement but not the books and some on the books but not the statement.
+#
+# Doesn't currently find exact matches when there are a bunch of transactions
+# for the same amount on the same day. Probably ought to do a first pass through
+# the books transactions to find an closely matching payee, then do another pass
+# disregarding payee.
 for r1 in statement_trans:
     for r2 in books_trans:
         match, note = records_match(r1, r2)
         if match:
             if not r2['statement'] or args.show_reconciled_matches:
-                    matches.append([r2['date'], f'{format_record(r1)}    -->    {format_record(r2)}  ✓ {note}'])
+                    matches.append([r2['date'], f'{format_record(r1)}  →  {format_record(r2)}  ✓ {note}'])
             if not r2['statement']:
-                metadata_to_apply.append((r2['filename'], r2['line'], f'    bank-statement: "{args.csv_statement}"\n'))
+                metadata_to_apply.append((r2['filename'], r2['line'], f'    bank-statement: "{os.path.basename(args.csv_statement)}:{r2["line"]}"\n'))
             books_trans.remove(r2)
             break
     else:
-        matches.append([r1['date'], f'{format_record(r1)}    -->    {" ":^44}  ✗ Not in books'])
+        matches.append([r1['date'], f'{format_record(r1)}  →  {" ":^45}  ✗ Not in books ({os.path.basename(args.csv_statement)}:{r1["line"]})'])
 for r2 in books_trans:
-    matches.append([r2['date'], f'{" ":^44}    -->    {format_record(r2)}  ✗ Not on statement'])
+    matches.append([r2['date'], f'{" ":^45}  →  {format_record(r2)}  ✗ Not on statement ({os.path.basename(r2["filename"])}:{r2["line"]})'])
 
-print(f'-----------------------------------------------------------------------------------------------------------------')
-print(f'{"STATEMENT":<40}            {"BOOKS":<40}   NOTES')
-print(f'-----------------------------------------------------------------------------------------------------------------')
+print('-' * 155)
+print(f'{"STATEMENT":<38}            {"BOOKS":<44}   NOTES')
+print('-' * 155)
 for _, output in sorted(matches):
     print(output)
-print(f'-----------------------------------------------------------------------------------------------------------------')
+print('-' * 155)
 
 # Write statement metadata back to books
 if metadata_to_apply: