reconcile: CLI entrypoint, improve docs.
This commit is contained in:
parent
59dfbb78d1
commit
fb5d0a57f3
2 changed files with 102 additions and 42 deletions
|
@ -228,6 +228,7 @@ for desc, query in QUERIES.items():
|
|||
|
||||
uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
|
||||
report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
|
||||
# TODO: Make the directory if it doesn't exist.
|
||||
with open(report_path, 'w') as f:
|
||||
f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
|
||||
print(f'Wrote reconciliation report: {report_path}.')
|
||||
|
|
|
@ -1,44 +1,97 @@
|
|||
"""Reconcile an AMEX/FR CSV statement against the books and print differences.
|
||||
"""Compare a bank CSV statement with the books.
|
||||
|
||||
Beancount users often write importers to create bookkeeping entries direct from
|
||||
a bank statement or similar. That approach automates data entry and
|
||||
reconciliation in one step. In some cases though, it's useful to manually enter
|
||||
transactions and reconcile them later on. This workflow helpful in cases like
|
||||
writing a paper check when there's a time lag between committing to making a
|
||||
payment and the funds being debited. That's the workflow we're using here.
|
||||
This tool takes an AMEX or First Republic CSV statement file and
|
||||
compares it line-by-line with the Beancount books to make sure that
|
||||
everything matches. This is designed for situations where transactions
|
||||
are entered into the books directly, rather than being imported from a
|
||||
statement after the fact.
|
||||
|
||||
Run like this:
|
||||
The reconciler will attempt to match transactions based on date,
|
||||
amount, check number and payee, but is forgiving to differences in
|
||||
dates, the absensce of check number and inexact matches on
|
||||
payee. Matches are ranked, so where there is only one decent match for
|
||||
an amount/date this is accepted, but if there are multiple similar
|
||||
candidates it will refuse to guess.
|
||||
|
||||
$ python3 -m pip install thefuzz
|
||||
$ python3 conservancy_beancount/reconcile/statement_reconciler.py \
|
||||
--beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
|
||||
--csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
|
||||
--account=Liabilities:CreditCard:AMEX
|
||||
The reconciler will also attempt to identify where a single statement
|
||||
entry has been split out into multiple Beancount postings, such as a
|
||||
single bank transfer representing health insurance for multiple
|
||||
employees.
|
||||
|
||||
Conservancy currently enter data by hand rather than using Beancount importers.
|
||||
This tool is still somewhat like an importer in that it needs to extract
|
||||
transaction details from a third-party statement. Instead of creating
|
||||
directives, it just checks to see that similar directives are already present.
|
||||
Run it like this:
|
||||
|
||||
$ statement_reconciler \
|
||||
--beancount-file=2021.beancount \
|
||||
--account=Liabilities:CreditCard:AMEX \
|
||||
--csv-statement=2021-09-10_AMEX_activity.csv \
|
||||
--bank-statement=2021-09-10_AMEX_activity.csv \
|
||||
--statement-balance=1000
|
||||
|
||||
Background:
|
||||
|
||||
Beancount users often write importers to create bookkeeping entries
|
||||
direct from a bank statement or similar. That approach automates data
|
||||
entry and reconciliation in one step. In some cases though, it's
|
||||
useful to manually enter transactions and reconcile them later
|
||||
on. This workflow helpful in cases like writing a paper check when
|
||||
there's a time lag between committing to making a payment and the
|
||||
funds being debited. That's the workflow we're using here.
|
||||
|
||||
Conservancy currently enter data by hand rather than using Beancount
|
||||
importers. This tool is still somewhat like an importer in that it
|
||||
needs to extract transaction details from a third-party
|
||||
statement. Instead of creating directives, it just checks to see that
|
||||
similar directives are already present. This is a bit like diff-ing a
|
||||
statement with the books (though we're only interested in the presence
|
||||
of lines, not so much their order).
|
||||
|
||||
Problems in scope:
|
||||
- errors in the books take hours to find during reconciliation ("you're entering a world of pain")
|
||||
- adding statement/reconciliation metadata to books is manual and prone to mistakes
|
||||
- Beancount doesn't provide any infrastructure for programmatically updating the books, only appending
|
||||
- after updates to the books files, beancount must be restarted to reflect updates
|
||||
- updates also invalidate the cache meaning restart takes several minutes
|
||||
- paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
|
||||
- balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
|
||||
- jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option
|
||||
|
||||
- errors in the books take hours to find during reconciliation,
|
||||
requiring manually comparing statemnts and the books and are
|
||||
succeptible to mistakes, such as not noticing when there are two
|
||||
payments for the same amount on the statement, but not in the books
|
||||
("you're entering a world of pain")
|
||||
|
||||
- adding statement/reconciliation metadata to books is/was manual and
|
||||
prone to mistakes
|
||||
|
||||
- Beancount doesn't provide any infrastructure for programmatically
|
||||
updating the books, only appending in the case of importers
|
||||
|
||||
- paper checks are entered in the books when written, but may not be
|
||||
cashed until months later (reconcile errors)
|
||||
|
||||
- jumping to an individual transaction in a large ledger isn't
|
||||
trivial - Emacs grep mode is the current best option
|
||||
|
||||
- Pam and other staff don't use Emacs
|
||||
- auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either
|
||||
- transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases
|
||||
|
||||
Q. How are reconciliation reports created currently? How do you read them?
|
||||
- by hand from copying and pasting from the helper tool output
|
||||
- auditors would prefer Bradley didn't perform reconciliation,
|
||||
ideally not Rosanne either
|
||||
|
||||
Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant.
|
||||
- reconciliation reports are created by hand when there are mismatches
|
||||
|
||||
Other related problems we're not dealing with here:
|
||||
|
||||
- after updates to the books files, beancount must be restarted to
|
||||
reflect updates
|
||||
|
||||
- updates also invalidate the cache meaning restart takes several
|
||||
minutes
|
||||
|
||||
- balance checks are manually updated in
|
||||
svn/Financial/Ledger/sanity-check-balances.yaml
|
||||
|
||||
- transactions are entered manually and reconciled after the fact,
|
||||
but importing from statements may be useful in some cases
|
||||
|
||||
"""
|
||||
|
||||
# TODO:
|
||||
# - extract the magic numbers
|
||||
# - consider merging in helper.py
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import copy
|
||||
|
@ -65,7 +118,7 @@ if not sys.warnoptions:
|
|||
from thefuzz import fuzz # type: ignore
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# Console logging.
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
|
@ -105,6 +158,14 @@ def remove_duplicate_words(text: str) -> str:
|
|||
|
||||
|
||||
def remove_payee_junk(payee: str) -> str:
|
||||
"""Clean up payee field to improve quality of fuzzy matching.
|
||||
|
||||
It turns out that bank statement "description" fields are
|
||||
difficult to fuzzy match on because they're long and
|
||||
noisey. Truncating them (see standardize_XXX_record fns) and
|
||||
removing the common junk helps significantly.
|
||||
|
||||
"""
|
||||
for r in JUNK_WORDS_RES:
|
||||
payee = r.sub('', payee)
|
||||
payee = ZERO_RE.sub('', payee)
|
||||
|
@ -129,8 +190,11 @@ def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable
|
|||
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
|
||||
|
||||
|
||||
# CSV reconciliation report.
|
||||
# Merge helper script?
|
||||
def validate_amex_csv(sample: str, account: str) -> None:
|
||||
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
||||
reader = csv.DictReader(io.StringIO(sample))
|
||||
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
||||
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
||||
|
||||
|
||||
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||
|
@ -147,13 +211,6 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
|
|||
}
|
||||
|
||||
|
||||
def validate_amex_csv(sample: str, account: str) -> None:
|
||||
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
||||
reader = csv.DictReader(io.StringIO(sample))
|
||||
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
||||
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
||||
|
||||
|
||||
def validate_fr_csv(sample: str, account: str) -> None:
|
||||
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
|
||||
reader = csv.DictReader(io.StringIO(sample))
|
||||
|
@ -346,8 +403,6 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str,
|
|||
return metadata
|
||||
|
||||
|
||||
# TODO: Is there a way to pull the side-effecting code out of this function?
|
||||
|
||||
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
||||
"""Insert reconciliation metadata in the books files.
|
||||
|
||||
|
@ -555,3 +610,7 @@ def main(args: argparse.Namespace) -> None:
|
|||
if __name__ == '__main__':
|
||||
args = parse_args(sys.argv)
|
||||
main(args)
|
||||
|
||||
def entry_point():
|
||||
args = parse_args(sys.argv)
|
||||
main(args)
|
||||
|
|
Loading…
Reference in a new issue