reconcile: CLI entrypoint, improve docs.
This commit is contained in:
parent
59dfbb78d1
commit
fb5d0a57f3
2 changed files with 102 additions and 42 deletions
|
@ -228,6 +228,7 @@ for desc, query in QUERIES.items():
|
||||||
|
|
||||||
uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
|
uncleared = [(r[0], r[2], r[4] or r[3], r[1]) for r in uncleared_rows]
|
||||||
report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
|
report_path = os.path.join(os.getenv('CONSERVANCY_REPOSITORY', ''), reconciliation_report_path(account, lastDateInPeriod))
|
||||||
|
# TODO: Make the directory if it doesn't exist.
|
||||||
with open(report_path, 'w') as f:
|
with open(report_path, 'w') as f:
|
||||||
f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
|
f.write(reconciliation_report(account, lastDateInPeriod, cleared_balance, uncleared, '1900-01-01', all_trans_balance, []))
|
||||||
print(f'Wrote reconciliation report: {report_path}.')
|
print(f'Wrote reconciliation report: {report_path}.')
|
||||||
|
|
|
@ -1,44 +1,97 @@
|
||||||
"""Reconcile an AMEX/FR CSV statement against the books and print differences.
|
"""Compare a bank CSV statement with the books.
|
||||||
|
|
||||||
Beancount users often write importers to create bookkeeping entries direct from
|
This tool takes an AMEX or First Republic CSV statement file and
|
||||||
a bank statement or similar. That approach automates data entry and
|
compares it line-by-line with the Beancount books to make sure that
|
||||||
reconciliation in one step. In some cases though, it's useful to manually enter
|
everything matches. This is designed for situations where transactions
|
||||||
transactions and reconcile them later on. This workflow helpful in cases like
|
are entered into the books directly, rather than being imported from a
|
||||||
writing a paper check when there's a time lag between committing to making a
|
statement after the fact.
|
||||||
payment and the funds being debited. That's the workflow we're using here.
|
|
||||||
|
|
||||||
Run like this:
|
The reconciler will attempt to match transactions based on date,
|
||||||
|
amount, check number and payee, but is forgiving to differences in
|
||||||
|
dates, the absensce of check number and inexact matches on
|
||||||
|
payee. Matches are ranked, so where there is only one decent match for
|
||||||
|
an amount/date this is accepted, but if there are multiple similar
|
||||||
|
candidates it will refuse to guess.
|
||||||
|
|
||||||
$ python3 -m pip install thefuzz
|
The reconciler will also attempt to identify where a single statement
|
||||||
$ python3 conservancy_beancount/reconcile/statement_reconciler.py \
|
entry has been split out into multiple Beancount postings, such as a
|
||||||
--beancount-file=$HOME/conservancy/beancount/books/2021.beancount \
|
single bank transfer representing health insurance for multiple
|
||||||
--csv-statement=$HOME/conservancy/confidential/2021-09-10_AMEX_activity.csv \
|
employees.
|
||||||
--account=Liabilities:CreditCard:AMEX
|
|
||||||
|
|
||||||
Conservancy currently enter data by hand rather than using Beancount importers.
|
Run it like this:
|
||||||
This tool is still somewhat like an importer in that it needs to extract
|
|
||||||
transaction details from a third-party statement. Instead of creating
|
$ statement_reconciler \
|
||||||
directives, it just checks to see that similar directives are already present.
|
--beancount-file=2021.beancount \
|
||||||
|
--account=Liabilities:CreditCard:AMEX \
|
||||||
|
--csv-statement=2021-09-10_AMEX_activity.csv \
|
||||||
|
--bank-statement=2021-09-10_AMEX_activity.csv \
|
||||||
|
--statement-balance=1000
|
||||||
|
|
||||||
|
Background:
|
||||||
|
|
||||||
|
Beancount users often write importers to create bookkeeping entries
|
||||||
|
direct from a bank statement or similar. That approach automates data
|
||||||
|
entry and reconciliation in one step. In some cases though, it's
|
||||||
|
useful to manually enter transactions and reconcile them later
|
||||||
|
on. This workflow helpful in cases like writing a paper check when
|
||||||
|
there's a time lag between committing to making a payment and the
|
||||||
|
funds being debited. That's the workflow we're using here.
|
||||||
|
|
||||||
|
Conservancy currently enter data by hand rather than using Beancount
|
||||||
|
importers. This tool is still somewhat like an importer in that it
|
||||||
|
needs to extract transaction details from a third-party
|
||||||
|
statement. Instead of creating directives, it just checks to see that
|
||||||
|
similar directives are already present. This is a bit like diff-ing a
|
||||||
|
statement with the books (though we're only interested in the presence
|
||||||
|
of lines, not so much their order).
|
||||||
|
|
||||||
Problems in scope:
|
Problems in scope:
|
||||||
- errors in the books take hours to find during reconciliation ("you're entering a world of pain")
|
|
||||||
- adding statement/reconciliation metadata to books is manual and prone to mistakes
|
- errors in the books take hours to find during reconciliation,
|
||||||
- Beancount doesn't provide any infrastructure for programmatically updating the books, only appending
|
requiring manually comparing statemnts and the books and are
|
||||||
- after updates to the books files, beancount must be restarted to reflect updates
|
succeptible to mistakes, such as not noticing when there are two
|
||||||
- updates also invalidate the cache meaning restart takes several minutes
|
payments for the same amount on the statement, but not in the books
|
||||||
- paper checks are entered in the books when written, but may not be cashed until months later (reconcile errors)
|
("you're entering a world of pain")
|
||||||
- balance checks are manually updated in svn/Financial/Ledger/sanity-check-balances.yaml
|
|
||||||
- jumping to an individual transaction in a large ledger isn't trivial - Emacs grep mode is the current best option
|
- adding statement/reconciliation metadata to books is/was manual and
|
||||||
|
prone to mistakes
|
||||||
|
|
||||||
|
- Beancount doesn't provide any infrastructure for programmatically
|
||||||
|
updating the books, only appending in the case of importers
|
||||||
|
|
||||||
|
- paper checks are entered in the books when written, but may not be
|
||||||
|
cashed until months later (reconcile errors)
|
||||||
|
|
||||||
|
- jumping to an individual transaction in a large ledger isn't
|
||||||
|
trivial - Emacs grep mode is the current best option
|
||||||
|
|
||||||
- Pam and other staff don't use Emacs
|
- Pam and other staff don't use Emacs
|
||||||
- auditors would prefer Bradley didn't perform reconciliation, ideally not Rosanne either
|
|
||||||
- transactions are entered manually and reconciled after the fact, but importing from statements may be useful in some cases
|
|
||||||
|
|
||||||
Q. How are reconciliation reports created currently? How do you read them?
|
- auditors would prefer Bradley didn't perform reconciliation,
|
||||||
- by hand from copying and pasting from the helper tool output
|
ideally not Rosanne either
|
||||||
|
|
||||||
Problem is potentially similar to diff-ing, but in the books, transaction order isn't super significant.
|
- reconciliation reports are created by hand when there are mismatches
|
||||||
|
|
||||||
|
Other related problems we're not dealing with here:
|
||||||
|
|
||||||
|
- after updates to the books files, beancount must be restarted to
|
||||||
|
reflect updates
|
||||||
|
|
||||||
|
- updates also invalidate the cache meaning restart takes several
|
||||||
|
minutes
|
||||||
|
|
||||||
|
- balance checks are manually updated in
|
||||||
|
svn/Financial/Ledger/sanity-check-balances.yaml
|
||||||
|
|
||||||
|
- transactions are entered manually and reconciled after the fact,
|
||||||
|
but importing from statements may be useful in some cases
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# - extract the magic numbers
|
||||||
|
# - consider merging in helper.py
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import collections
|
import collections
|
||||||
import copy
|
import copy
|
||||||
|
@ -65,7 +118,7 @@ if not sys.warnoptions:
|
||||||
from thefuzz import fuzz # type: ignore
|
from thefuzz import fuzz # type: ignore
|
||||||
|
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
# Console logging.
|
# Console logging.
|
||||||
logger.addHandler(logging.StreamHandler())
|
logger.addHandler(logging.StreamHandler())
|
||||||
|
@ -105,6 +158,14 @@ def remove_duplicate_words(text: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
def remove_payee_junk(payee: str) -> str:
|
def remove_payee_junk(payee: str) -> str:
|
||||||
|
"""Clean up payee field to improve quality of fuzzy matching.
|
||||||
|
|
||||||
|
It turns out that bank statement "description" fields are
|
||||||
|
difficult to fuzzy match on because they're long and
|
||||||
|
noisey. Truncating them (see standardize_XXX_record fns) and
|
||||||
|
removing the common junk helps significantly.
|
||||||
|
|
||||||
|
"""
|
||||||
for r in JUNK_WORDS_RES:
|
for r in JUNK_WORDS_RES:
|
||||||
payee = r.sub('', payee)
|
payee = r.sub('', payee)
|
||||||
payee = ZERO_RE.sub('', payee)
|
payee = ZERO_RE.sub('', payee)
|
||||||
|
@ -129,8 +190,11 @@ def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable
|
||||||
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
|
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
|
||||||
|
|
||||||
|
|
||||||
# CSV reconciliation report.
|
def validate_amex_csv(sample: str, account: str) -> None:
|
||||||
# Merge helper script?
|
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
||||||
|
reader = csv.DictReader(io.StringIO(sample))
|
||||||
|
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
||||||
|
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
||||||
|
|
||||||
|
|
||||||
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||||
|
@ -147,13 +211,6 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def validate_amex_csv(sample: str, account: str) -> None:
|
|
||||||
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
|
||||||
reader = csv.DictReader(io.StringIO(sample))
|
|
||||||
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
|
||||||
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
|
||||||
|
|
||||||
|
|
||||||
def validate_fr_csv(sample: str, account: str) -> None:
|
def validate_fr_csv(sample: str, account: str) -> None:
|
||||||
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
|
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
|
||||||
reader = csv.DictReader(io.StringIO(sample))
|
reader = csv.DictReader(io.StringIO(sample))
|
||||||
|
@ -346,8 +403,6 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str,
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
# TODO: Is there a way to pull the side-effecting code out of this function?
|
|
||||||
|
|
||||||
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
||||||
"""Insert reconciliation metadata in the books files.
|
"""Insert reconciliation metadata in the books files.
|
||||||
|
|
||||||
|
@ -555,3 +610,7 @@ def main(args: argparse.Namespace) -> None:
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
args = parse_args(sys.argv)
|
args = parse_args(sys.argv)
|
||||||
main(args)
|
main(args)
|
||||||
|
|
||||||
|
def entry_point():
|
||||||
|
args = parse_args(sys.argv)
|
||||||
|
main(args)
|
||||||
|
|
Loading…
Reference in a new issue