reconcile: Make bank-statement path relative to $CONSERVANCY_BEANCOUNT.

Also add CSV metadata.
This commit is contained in:
Ben Sturmfels 2022-02-21 18:31:07 +11:00
parent a3e60c639f
commit 3377918279
Signed by: bsturmfels
GPG key ID: 023C05E2C9C068F0
2 changed files with 79 additions and 15 deletions

View file

@ -273,14 +273,8 @@ def match_statement_and_books(statement_trans: list, books_trans: list):
return matches return matches
def format_matches(statement_trans, books_trans, show_reconciled_matches: bool, csv_statement: str): def format_matches(matches, csv_statement: str):
match_output = [] match_output = []
metadata_to_apply = []
total_matched = decimal.Decimal(0)
total_missing_from_books = decimal.Decimal(0)
total_missing_from_statement = decimal.Decimal(0)
matches = match_statement_and_books(statement_trans, books_trans)
for r1, r2, note in matches: for r1, r2, note in matches:
note = ', '.join(note) note = ', '.join(note)
note = ': ' + note if note else note note = ': ' + note if note else note
@ -290,7 +284,7 @@ def format_matches(statement_trans, books_trans, show_reconciled_matches: bool,
match_output.append([r1[0]['date'], f'{format_record(r1)}{" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})']) match_output.append([r1[0]['date'], f'{format_record(r1)}{" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1[0]["line"]})'])
else: else:
match_output.append([r2[0]['date'], f'{" ":^59}{format_record(r2)} ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})']) match_output.append([r2[0]['date'], f'{" ":^59}{format_record(r2)} ✗ NOT ON STATEMENT ({os.path.basename(r2[0]["filename"])}:{r2[0]["line"]})'])
return match_output, metadata_to_apply, total_matched, total_missing_from_books, total_missing_from_statement return match_output
# TODO: Could potentially return a score so that we can find the best match from # TODO: Could potentially return a score so that we can find the best match from
@ -304,6 +298,18 @@ def date_proximity(d1, d2):
else: else:
return 1.0 - (diff / 60.0) return 1.0 - (diff / 60.0)
def metadata_for_match(match, statement_filename, csv_filename):
# Can we really ever have multiple statement entries? Probably not.
statement_filename = get_repo_relative_path(statement_filename)
csv_filename = get_repo_relative_path(csv_filename)
metadata = []
statement_entries, books_entries, _ = match
for books_entry in books_entries:
for statement_entry in statement_entries:
if not books_entry['bank_statement']:
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement: {statement_filename}'))
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement-csv: {csv_filename}:{statement_entry["line"]}'))
return metadata
# TODO: Is there a way to pull the side-effecting code out of this function? # TODO: Is there a way to pull the side-effecting code out of this function?
@ -332,7 +338,7 @@ def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> No
with open(filename, 'r') as f: with open(filename, 'r') as f:
file_contents[filename] = f.readlines() file_contents[filename] = f.readlines()
# Insert is inefficient, but fast enough for now in practise. # Insert is inefficient, but fast enough for now in practise.
file_contents[filename].insert(line + file_offsets[filename], metadata) file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n')
file_offsets[filename] += 1 file_offsets[filename] += 1
# Writes each updated file back to disk. # Writes each updated file back to disk.
for filename, contents in file_contents.items(): for filename, contents in file_contents.items():
@ -340,11 +346,29 @@ def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> No
f.writelines(contents) f.writelines(contents)
print(f'Wrote {filename}.') print(f'Wrote {filename}.')
def get_repo_relative_path(path):
return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY'))
def parse_path(path):
if not os.path.exists(path):
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
return path
def parse_repo_relative_path(path):
if not os.path.exists(path):
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
repo = os.getenv('CONSERVANCY_REPOSITORY')
if not repo:
raise argparse.ArgumentTypeError(f'$CONSERVANCY_REPOSITORY is not set.')
if not path.startswith(repo):
raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
return path
def parse_args(argv): def parse_args(argv):
parser = argparse.ArgumentParser(description='Reconciliation helper') parser = argparse.ArgumentParser(description='Reconciliation helper')
parser.add_argument('--beancount-file', required=True) parser.add_argument('--beancount-file', required=True, type=parse_path)
parser.add_argument('--csv-statement', required=True) parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX') parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
parser.add_argument('--grep-output-filename') parser.add_argument('--grep-output-filename')
# parser.add_argument('--report-group-regex') # parser.add_argument('--report-group-regex')
@ -397,16 +421,20 @@ def main(args):
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
matches, metadata_to_apply, total_matched, total_missing_from_books, total_missing_from_statement = format_matches( matches = match_statement_and_books(statement_trans, books_trans)
statement_trans, books_trans, args.show_reconciled_matches, args.csv_statement) match_output = format_matches(matches, args.csv_statement)
# assert books_balance == books_balance_reconciled + total_matched + total_missing_from_statement # assert books_balance == books_balance_reconciled + total_matched + total_missing_from_statement
total_matched = 0
total_missing_from_statement = 0
total_missing_from_books = 0
out = io.StringIO() out = io.StringIO()
print('-' * 155) print('-' * 155)
print(f'{"Statement transaction":<52} {"Books transaction":<58} Notes') print(f'{"Statement transaction":<52} {"Books transaction":<58} Notes')
print('-' * 155) print('-' * 155)
for _, output in sorted(matches): for _, output in sorted(match_output):
print(output) print(output)
print('-' * 155) print('-' * 155)
print(f'Statement period: {begin_date} to {end_date}') print(f'Statement period: {begin_date} to {end_date}')
@ -419,6 +447,10 @@ def main(args):
print('-' * 155) print('-' * 155)
# Write statement metadata back to books # Write statement metadata back to books
metadata_to_apply = []
for match in matches:
# TODO: Shouldn't write if no match.
metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
if metadata_to_apply and not args.non_interactive: if metadata_to_apply and not args.non_interactive:
print('Mark matched transactions as reconciled in the books? (y/N) ', end='') print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
if input().lower() == 'y': if input().lower() == 'y':

View file

@ -1,5 +1,8 @@
import datetime import datetime
import decimal import decimal
import os
import tempfile
import textwrap
from conservancy_beancount.reconcile.prototype_amex_reconciler import ( from conservancy_beancount.reconcile.prototype_amex_reconciler import (
match_statement_and_books, match_statement_and_books,
@ -7,6 +10,8 @@ from conservancy_beancount.reconcile.prototype_amex_reconciler import (
date_proximity, date_proximity,
remove_duplicate_words, remove_duplicate_words,
payee_match, payee_match,
metadata_for_match,
write_metadata_to_books
) )
S1 = { S1 = {
@ -38,7 +43,7 @@ B1 = {
'check_id': '', 'check_id': '',
'filename': '2022/imports.beancount', 'filename': '2022/imports.beancount',
'line': 777, 'line': 777,
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf" 'bank_statement': '',
} }
B2 = { B2 = {
'date': datetime.date(2022, 1, 2), 'date': datetime.date(2022, 1, 2),
@ -186,3 +191,30 @@ def test_remove_duplicate_words():
def test_payee_matches_when_first_word_matches(): def test_payee_matches_when_first_word_matches():
assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0 assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8 assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
def test_metadata_for_match(monkeypatch):
monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
('2022/imports.beancount', 777, ' bank-statement: statement.pdf'),
('2022/imports.beancount', 777, ' bank-statement-csv: statement.csv:222'),
]
def test_write_to_books():
books = textwrap.dedent("""\
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
Liabilities:CreditCard:AMEX -15.50 USD
Expenses:Hosting 15.50 USD""")
f = tempfile.NamedTemporaryFile('w', delete=False)
f.write(books)
f.close()
metadata = [(f.name, 2, ' bank-statement: statement.pdf')]
write_metadata_to_books(metadata)
with open(f.name) as f:
output = f.read()
assert output == textwrap.dedent("""\
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
Liabilities:CreditCard:AMEX -15.50 USD
bank-statement: statement.pdf
Expenses:Hosting 15.50 USD""")
os.remove(f.name)