reconcile: CLI entrypoint, improve docs.
This commit is contained in:
		
							parent
							
								
									59dfbb78d1
								
							
						
					
					
						commit
						fb5d0a57f3
					
				
					 2 changed files with 102 additions and 42 deletions
				
			
		| 
						 | 
					@ -228,6 +228,7 @@ for desc, query in QUERIES.items():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
 | 
					uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
 | 
				
			||||||
report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
 | 
					report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
 | 
				
			||||||
 | 
					# TODO: Make the directory if it doesn't exist.
 | 
				
			||||||
with open(report_path, 'w') as f:
 | 
					with open(report_path, 'w') as f:
 | 
				
			||||||
    f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
 | 
					    f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
 | 
				
			||||||
print(f'Wrote reconciliation report: {report_path}.')
 | 
					print(f'Wrote reconciliation report: {report_path}.')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,44 +1,97 @@
 | 
				
			||||||
"""Reconcile an AMEX/FR CSV statement against the books and print differences.
 | 
					"""Compare a bank CSV statement with the books.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Beancount users often write importers to create bookkeeping entries direct from
 | 
					This tool takes an AMEX or First Republic CSV statement file and
 | 
				
			||||||
a bank statement or similar. That approach automates data entry and
 | 
					compares it line-by-line with the Beancount books to make sure that
 | 
				
			||||||
reconciliation in one step. In some cases though, it's useful to manually enter
 | 
					everything matches. This is designed for situations where transactions
 | 
				
			||||||
transactions and reconcile them later on. This workflow helpful in cases like
 | 
					are entered into the books directly, rather than being imported from a
 | 
				
			||||||
writing a paper check when there's a time lag between committing to making a
 | 
					statement after the fact.
 | 
				
			||||||
payment and the funds being debited. That's the workflow we're using here.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Run like this:
 | 
					The reconciler will attempt to match transactions based on date,
 | 
				
			||||||
 | 
					amount, check number and payee, but is forgiving to differences in
 | 
				
			||||||
 | 
					dates, the absensce of check number and inexact matches on
 | 
				
			||||||
 | 
					payee. Matches are ranked, so where there is only one decent match for
 | 
				
			||||||
 | 
					an amount/date this is accepted, but if there are multiple similar
 | 
				
			||||||
 | 
					candidates it will refuse to guess.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
$ python3 -m pip install thefuzz
 | 
					The reconciler will also attempt to identify where a single statement
 | 
				
			||||||
$ python3 conservancy_beancount/reconcile/statement_reconciler.py \
 | 
					entry has been split out into multiple Beancount postings, such as a
 | 
				
			||||||
  --beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
 | 
					single bank transfer representing health insurance for multiple
 | 
				
			||||||
  --csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
 | 
					employees.
 | 
				
			||||||
  --account=Liabilities:CreditCard:AMEX
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Conservancy currently enter data by hand rather than using Beancount importers.
 | 
					Run it like this:
 | 
				
			||||||
This tool is still somewhat like an importer in that it needs to extract
 | 
					
 | 
				
			||||||
transaction details from a third-party statement. Instead of creating
 | 
					$ statement_reconciler \
 | 
				
			||||||
directives, it just checks to see that similar directives are already present.
 | 
					  --beancount-file=2021.beancount \
 | 
				
			||||||
 | 
					  --account=Liabilities:CreditCard:AMEX \
 | 
				
			||||||
 | 
					  --csv-statement=2021-09-10_AMEX_activity.csv \
 | 
				
			||||||
 | 
					  --bank-statement=2021-09-10_AMEX_activity.csv \
 | 
				
			||||||
 | 
					  --statement-balance=1000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Background:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Beancount users often write importers to create bookkeeping entries
 | 
				
			||||||
 | 
					direct from a bank statement or similar. That approach automates data
 | 
				
			||||||
 | 
					entry and reconciliation in one step. In some cases though, it's
 | 
				
			||||||
 | 
					useful to manually enter transactions and reconcile them later
 | 
				
			||||||
 | 
					on. This workflow helpful in cases like writing a paper check when
 | 
				
			||||||
 | 
					there's a time lag between committing to making a payment and the
 | 
				
			||||||
 | 
					funds being debited. That's the workflow we're using here.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Conservancy currently enter data by hand rather than using Beancount
 | 
				
			||||||
 | 
					importers.  This tool is still somewhat like an importer in that it
 | 
				
			||||||
 | 
					needs to extract transaction details from a third-party
 | 
				
			||||||
 | 
					statement. Instead of creating directives, it just checks to see that
 | 
				
			||||||
 | 
					similar directives are already present. This is a bit like diff-ing a
 | 
				
			||||||
 | 
					statement with the books (though we're only interested in the presence
 | 
				
			||||||
 | 
					of lines, not so much their order).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Problems in scope:
 | 
					Problems in scope:
 | 
				
			||||||
 - errors in the books take hours to find during reconciliation ("you're entering a world of pain")
 | 
					
 | 
				
			||||||
 - adding statement/reconciliation metadata to books is manual and prone to mistakes
 | 
					 - errors in the books take hours to find during reconciliation,
 | 
				
			||||||
 - Beancount doesn't provide any infrastructure for programmatically updating the books, only appending
 | 
					   requiring manually comparing statemnts and the books and are
 | 
				
			||||||
 - after updates to the books files, beancount must be restarted to reflect updates
 | 
					   succeptible to mistakes, such as not noticing when there are two
 | 
				
			||||||
 - updates also invalidate the cache meaning restart takes several minutes
 | 
					   payments for the same amount on the statement, but not in the books
 | 
				
			||||||
 - paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
 | 
					   ("you're entering a world of pain")
 | 
				
			||||||
 - balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
 | 
					
 | 
				
			||||||
 - jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option
 | 
					 - adding statement/reconciliation metadata to books is/was manual and
 | 
				
			||||||
 | 
					   prone to mistakes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - Beancount doesn't provide any infrastructure for programmatically
 | 
				
			||||||
 | 
					   updating the books, only appending in the case of importers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - paper checks are entered in the books when written, but may not be
 | 
				
			||||||
 | 
					   cashed until months later (reconcile errors)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - jumping to an individual transaction in a large ledger isn't
 | 
				
			||||||
 | 
					   trivial - Emacs grep mode is the current best option
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 - Pam and other staff don't use Emacs
 | 
					 - Pam and other staff don't use Emacs
 | 
				
			||||||
 - auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either
 | 
					 | 
				
			||||||
 - transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Q. How are reconciliation reports created currently? How do you read them?
 | 
					 - auditors would prefer Bradley didn't perform reconciliation,
 | 
				
			||||||
 - by hand from copying and pasting from the helper tool output
 | 
					   ideally not Rosanne either
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant.
 | 
					 - reconciliation reports are created by hand when there are mismatches
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Other related problems we're not dealing with here:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - after updates to the books files, beancount must be restarted to
 | 
				
			||||||
 | 
					   reflect updates
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - updates also invalidate the cache meaning restart takes several
 | 
				
			||||||
 | 
					   minutes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - balance checks are manually updated in
 | 
				
			||||||
 | 
					   svn/Financial/Ledger/sanity-check-balances.yaml
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - transactions are entered manually and reconciled after the fact,
 | 
				
			||||||
 | 
					   but importing from statements may be useful in some cases
 | 
				
			||||||
 | 
					
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO:
 | 
				
			||||||
 | 
					#  - extract the magic numbers
 | 
				
			||||||
 | 
					#  - consider merging in helper.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import argparse
 | 
					import argparse
 | 
				
			||||||
import collections
 | 
					import collections
 | 
				
			||||||
import copy
 | 
					import copy
 | 
				
			||||||
| 
						 | 
					@ -65,7 +118,7 @@ if not sys.warnoptions:
 | 
				
			||||||
from thefuzz import fuzz  # type: ignore
 | 
					from thefuzz import fuzz  # type: ignore
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger()
 | 
					logger = logging.getLogger()
 | 
				
			||||||
logger.setLevel(logging.DEBUG)
 | 
					logger.setLevel(logging.INFO)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Console logging.
 | 
					# Console logging.
 | 
				
			||||||
logger.addHandler(logging.StreamHandler())
 | 
					logger.addHandler(logging.StreamHandler())
 | 
				
			||||||
| 
						 | 
					@ -105,6 +158,14 @@ def remove_duplicate_words(text: str) -> str:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def remove_payee_junk(payee: str) -> str:
 | 
					def remove_payee_junk(payee: str) -> str:
 | 
				
			||||||
 | 
					    """Clean up payee field to improve quality of fuzzy matching.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    It turns out that bank statement "description" fields are
 | 
				
			||||||
 | 
					    difficult to fuzzy match on because they're long and
 | 
				
			||||||
 | 
					    noisey. Truncating them (see standardize_XXX_record fns) and
 | 
				
			||||||
 | 
					    removing the common junk helps significantly.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
    for r in JUNK_WORDS_RES:
 | 
					    for r in JUNK_WORDS_RES:
 | 
				
			||||||
        payee = r.sub('', payee)
 | 
					        payee = r.sub('', payee)
 | 
				
			||||||
    payee = ZERO_RE.sub('', payee)
 | 
					    payee = ZERO_RE.sub('', payee)
 | 
				
			||||||
| 
						 | 
					@ -129,8 +190,11 @@ def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable
 | 
				
			||||||
    return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 | 
					    return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# CSV reconciliation report.
 | 
					def validate_amex_csv(sample: str, account: str) -> None:
 | 
				
			||||||
# Merge helper script?
 | 
					    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 | 
				
			||||||
 | 
					    reader = csv.DictReader(io.StringIO(sample))
 | 
				
			||||||
 | 
					    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 | 
				
			||||||
 | 
					        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def standardize_amex_record(row: Dict, line: int) -> Dict:
 | 
					def standardize_amex_record(row: Dict, line: int) -> Dict:
 | 
				
			||||||
| 
						 | 
					@ -147,13 +211,6 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def validate_amex_csv(sample: str, account: str) -> None:
 | 
					 | 
				
			||||||
    required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
 | 
					 | 
				
			||||||
    reader = csv.DictReader(io.StringIO(sample))
 | 
					 | 
				
			||||||
    if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
 | 
					 | 
				
			||||||
        sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def validate_fr_csv(sample: str, account: str) -> None:
 | 
					def validate_fr_csv(sample: str, account: str) -> None:
 | 
				
			||||||
    required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
 | 
					    required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
 | 
				
			||||||
    reader = csv.DictReader(io.StringIO(sample))
 | 
					    reader = csv.DictReader(io.StringIO(sample))
 | 
				
			||||||
| 
						 | 
					@ -346,8 +403,6 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str,
 | 
				
			||||||
    return metadata
 | 
					    return metadata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# TODO: Is there a way to pull the side-effecting code out of this function?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
 | 
					def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
 | 
				
			||||||
    """Insert reconciliation metadata in the books files.
 | 
					    """Insert reconciliation metadata in the books files.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -555,3 +610,7 @@ def main(args: argparse.Namespace) -> None:
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    args = parse_args(sys.argv)
 | 
					    args = parse_args(sys.argv)
 | 
				
			||||||
    main(args)
 | 
					    main(args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def entry_point():
 | 
				
			||||||
 | 
					    args = parse_args(sys.argv)
 | 
				
			||||||
 | 
					    main(args)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue