reconcile: CLI entrypoint, improve docs.

This commit is contained in:
Ben Sturmfels 2022-03-02 10:05:07 +11:00
parent 59dfbb78d1
commit fb5d0a57f3
Signed by: bsturmfels
GPG key ID: 023C05E2C9C068F0
2 changed files with 102 additions and 42 deletions

View file

@ -228,6 +228,7 @@ for desc, query in QUERIES.items():
uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
# TODO: Make the directory if it doesn't exist.
with open(report_path, 'w') as f:
f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
print(f'Wrote reconciliation report: {report_path}.')

View file

@ -1,44 +1,97 @@
"""Reconcile an AMEX/FR CSV statement against the books and print differences.
"""Compare a bank CSV statement with the books.
Beancount users often write importers to create bookkeeping entries direct from
a bank statement or similar. That approach automates data entry and
reconciliation in one step. In some cases though, it's useful to manually enter
transactions and reconcile them later on. This workflow helpful in cases like
writing a paper check when there's a time lag between committing to making a
payment and the funds being debited. That's the workflow we're using here.
This tool takes an AMEX or First Republic CSV statement file and
compares it line-by-line with the Beancount books to make sure that
everything matches. This is designed for situations where transactions
are entered into the books directly, rather than being imported from a
statement after the fact.
Run like this:
The reconciler will attempt to match transactions based on date,
amount, check number and payee, but is forgiving to differences in
dates, the absensce of check number and inexact matches on
payee. Matches are ranked, so where there is only one decent match for
an amount/date this is accepted, but if there are multiple similar
candidates it will refuse to guess.
$ python3 -m pip install thefuzz
$ python3 conservancy_beancount/reconcile/statement_reconciler.py \
--beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
--csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
--account=Liabilities:CreditCard:AMEX
The reconciler will also attempt to identify where a single statement
entry has been split out into multiple Beancount postings, such as a
single bank transfer representing health insurance for multiple
employees.
Conservancy currently enter data by hand rather than using Beancount importers.
This tool is still somewhat like an importer in that it needs to extract
transaction details from a third-party statement. Instead of creating
directives, it just checks to see that similar directives are already present.
Run it like this:
$ statement_reconciler \
--beancount-file=2021.beancount \
--account=Liabilities:CreditCard:AMEX \
--csv-statement=2021-09-10_AMEX_activity.csv \
--bank-statement=2021-09-10_AMEX_activity.csv \
--statement-balance=1000
Background:
Beancount users often write importers to create bookkeeping entries
direct from a bank statement or similar. That approach automates data
entry and reconciliation in one step. In some cases though, it's
useful to manually enter transactions and reconcile them later
on. This workflow helpful in cases like writing a paper check when
there's a time lag between committing to making a payment and the
funds being debited. That's the workflow we're using here.
Conservancy currently enter data by hand rather than using Beancount
importers. This tool is still somewhat like an importer in that it
needs to extract transaction details from a third-party
statement. Instead of creating directives, it just checks to see that
similar directives are already present. This is a bit like diff-ing a
statement with the books (though we're only interested in the presence
of lines, not so much their order).
Problems in scope:
- errors in the books take hours to find during reconciliation ("you're entering a world of pain")
- adding statement/reconciliation metadata to books is manual and prone to mistakes
- Beancount doesn't provide any infrastructure for programmatically updating the books, only appending
- after updates to the books files, beancount must be restarted to reflect updates
- updates also invalidate the cache meaning restart takes several minutes
- paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
- balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
- jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option
- errors in the books take hours to find during reconciliation,
requiring manually comparing statemnts and the books and are
succeptible to mistakes, such as not noticing when there are two
payments for the same amount on the statement, but not in the books
("you're entering a world of pain")
- adding statement/reconciliation metadata to books is/was manual and
prone to mistakes
- Beancount doesn't provide any infrastructure for programmatically
updating the books, only appending in the case of importers
- paper checks are entered in the books when written, but may not be
cashed until months later (reconcile errors)
- jumping to an individual transaction in a large ledger isn't
trivial - Emacs grep mode is the current best option
- Pam and other staff don't use Emacs
- auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either
- transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases
Q. How are reconciliation reports created currently? How do you read them?
- by hand from copying and pasting from the helper tool output
- auditors would prefer Bradley didn't perform reconciliation,
ideally not Rosanne either
Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant.
- reconciliation reports are created by hand when there are mismatches
Other related problems we're not dealing with here:
- after updates to the books files, beancount must be restarted to
reflect updates
- updates also invalidate the cache meaning restart takes several
minutes
- balance checks are manually updated in
svn/Financial/Ledger/sanity-check-balances.yaml
- transactions are entered manually and reconciled after the fact,
but importing from statements may be useful in some cases
"""
# TODO:
# - extract the magic numbers
# - consider merging in helper.py
import argparse
import collections
import copy
@ -65,7 +118,7 @@ if not sys.warnoptions:
from thefuzz import fuzz # type: ignore
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)
# Console logging.
logger.addHandler(logging.StreamHandler())
@ -105,6 +158,14 @@ def remove_duplicate_words(text: str) -> str:
def remove_payee_junk(payee: str) -> str:
"""Clean up payee field to improve quality of fuzzy matching.
It turns out that bank statement "description" fields are
difficult to fuzzy match on because they're long and
noisey. Truncating them (see standardize_XXX_record fns) and
removing the common junk helps significantly.
"""
for r in JUNK_WORDS_RES:
payee = r.sub('', payee)
payee = ZERO_RE.sub('', payee)
@ -129,8 +190,11 @@ def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
# CSV reconciliation report.
# Merge helper script?
def validate_amex_csv(sample: str, account: str) -> None:
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
reader = csv.DictReader(io.StringIO(sample))
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
def standardize_amex_record(row: Dict, line: int) -> Dict:
@ -147,13 +211,6 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
}
def validate_amex_csv(sample: str, account: str) -> None:
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
reader = csv.DictReader(io.StringIO(sample))
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
def validate_fr_csv(sample: str, account: str) -> None:
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
reader = csv.DictReader(io.StringIO(sample))
@ -346,8 +403,6 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str,
return metadata
# TODO: Is there a way to pull the side-effecting code out of this function?
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
"""Insert reconciliation metadata in the books files.
@ -555,3 +610,7 @@ def main(args: argparse.Namespace) -> None:
if __name__ == '__main__':
args = parse_args(sys.argv)
main(args)
def entry_point():
args = parse_args(sys.argv)
main(args)