631 lines
26 KiB
Python
631 lines
26 KiB
Python
"""Compare a bank CSV statement with the books.
|
|
|
|
This tool takes an AMEX or First Republic CSV statement file and
|
|
compares it line-by-line with the Beancount books to make sure that
|
|
everything matches. This is designed for situations where transactions
|
|
are entered into the books directly, rather than being imported from a
|
|
statement after the fact.
|
|
|
|
The reconciler will attempt to match transactions based on date,
|
|
amount, check number and payee, but is forgiving to differences in
|
|
dates, the absensce of check number and inexact matches on
|
|
payee. Matches are ranked, so where there is only one decent match for
|
|
an amount/date this is accepted, but if there are multiple similar
|
|
candidates it will refuse to guess.
|
|
|
|
The reconciler will also attempt to identify where a single statement
|
|
entry has been split out into multiple Beancount postings, such as a
|
|
single bank transfer representing health insurance for multiple
|
|
employees.
|
|
|
|
Run it like this:
|
|
|
|
$ statement_reconciler \
|
|
--beancount-file 2021.beancount \
|
|
--account Liabilities:CreditCard:AMEX \
|
|
--csv-statement ~/svn/2021-09-10_AMEX_activity.csv \
|
|
--bank-statement ~/svn/2021-09-10_AMEX_activity.pdf
|
|
|
|
Background:
|
|
|
|
Beancount users often write importers to create bookkeeping entries
|
|
direct from a bank statement or similar. That approach automates data
|
|
entry and reconciliation in one step. In some cases though, it's
|
|
useful to manually enter transactions and reconcile them later
|
|
on. This workflow helpful in cases like writing a paper check when
|
|
there's a time lag between committing to making a payment and the
|
|
funds being debited. That's the workflow we're using here.
|
|
|
|
Conservancy currently enter data by hand rather than using Beancount
|
|
importers. This tool is still somewhat like an importer in that it
|
|
needs to extract transaction details from a third-party
|
|
statement. Instead of creating directives, it just checks to see that
|
|
similar directives are already present. This is a bit like diff-ing a
|
|
statement with the books (though we're only interested in the presence
|
|
of lines, not so much their order).
|
|
|
|
Problems in scope:
|
|
|
|
- errors in the books take hours to find during reconciliation,
|
|
requiring manually comparing statemnts and the books and are
|
|
succeptible to mistakes, such as not noticing when there are two
|
|
payments for the same amount on the statement, but not in the books
|
|
("you're entering a world of pain")
|
|
|
|
- adding statement/reconciliation metadata to books is/was manual and
|
|
prone to mistakes
|
|
|
|
- Beancount doesn't provide any infrastructure for programmatically
|
|
updating the books, only appending in the case of importers
|
|
|
|
- paper checks are entered in the books when written, but may not be
|
|
cashed until months later (reconcile errors)
|
|
|
|
- jumping to an individual transaction in a large ledger isn't
|
|
trivial - Emacs grep mode is the current best option
|
|
|
|
- Pam and other staff don't use Emacs
|
|
|
|
- auditors would prefer Bradley didn't perform reconciliation,
|
|
ideally not Rosanne either
|
|
|
|
- reconciliation reports are created by hand when there are mismatches
|
|
|
|
Other related problems we're not dealing with here:
|
|
|
|
- after updates to the books files, beancount must be restarted to
|
|
reflect updates
|
|
|
|
- updates also invalidate the cache meaning restart takes several
|
|
minutes
|
|
|
|
- balance checks are manually updated in
|
|
svn/Financial/Ledger/sanity-check-balances.yaml
|
|
|
|
- transactions are entered manually and reconciled after the fact,
|
|
but importing from statements may be useful in some cases
|
|
|
|
"""
|
|
|
|
# TODO:
|
|
# - extract the magic numbers
|
|
# - consider merging in helper.py
|
|
|
|
import argparse
|
|
import collections
|
|
import copy
|
|
import csv
|
|
import datetime
|
|
import decimal
|
|
import io
|
|
import itertools
|
|
import logging
|
|
import os
|
|
import re
|
|
import sys
|
|
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO
|
|
|
|
from beancount import loader
|
|
from beancount.query.query import run_query
|
|
from colorama import Fore, Style # type: ignore
|
|
|
|
from .. import cliutil
|
|
from .. import config as configmod
|
|
|
|
if not sys.warnoptions:
|
|
import warnings
|
|
# Disable annoying warning from thefuzz prompting for a C extension. The
|
|
# current pure-Python implementation isn't a bottleneck for us.
|
|
warnings.filterwarnings('ignore', category=UserWarning, module='thefuzz.fuzz')
|
|
from thefuzz import fuzz # type: ignore
|
|
|
|
PROGNAME = 'reconcile-statement'
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Get some interesting feedback on call to RT with this:
|
|
# logger.setLevel(logging.DEBUG)
|
|
# logger.addHandler(logging.StreamHandler())
|
|
|
|
JUNK_WORDS = [
|
|
'software',
|
|
'freedom',
|
|
'conservancy',
|
|
'conse',
|
|
'payment',
|
|
'echeck',
|
|
'bill',
|
|
'debit',
|
|
'wire',
|
|
'credit',
|
|
"int'l",
|
|
"in.l",
|
|
'llc',
|
|
'online',
|
|
'donation',
|
|
'usd',
|
|
'inc',
|
|
]
|
|
JUNK_WORDS_RES = [re.compile(word, re.IGNORECASE) for word in JUNK_WORDS]
|
|
ZERO_RE = re.compile('^0+')
|
|
|
|
|
|
def remove_duplicate_words(text: str) -> str:
|
|
unique_words = []
|
|
known_words = set()
|
|
for word in text.split():
|
|
if word.lower() not in known_words:
|
|
unique_words.append(word)
|
|
known_words.add(word.lower())
|
|
return ' '.join(unique_words)
|
|
|
|
|
|
def remove_payee_junk(payee: str) -> str:
|
|
"""Clean up payee field to improve quality of fuzzy matching.
|
|
|
|
It turns out that bank statement "description" fields are
|
|
difficult to fuzzy match on because they're long and
|
|
noisey. Truncating them (see standardize_XXX_record fns) and
|
|
removing the common junk helps significantly.
|
|
|
|
"""
|
|
for r in JUNK_WORDS_RES:
|
|
payee = r.sub('', payee)
|
|
payee = ZERO_RE.sub('', payee)
|
|
payee = payee.replace(' - ', ' ')
|
|
payee = re.sub(r'\.0\.\d+', ' ', payee)
|
|
payee = payee.replace('.0', ' ')
|
|
payee = payee.replace('/', ' ')
|
|
payee = re.sub(re.escape('.com'), ' ', payee, flags=re.IGNORECASE)
|
|
payee = re.sub(re.escape('.net'), ' ', payee, flags=re.IGNORECASE)
|
|
payee = payee.replace('*', ' ')
|
|
payee = ' '.join([i for i in payee.split(' ') if len(i) > 2])
|
|
payee = payee.replace('-', ' ')
|
|
payee = remove_duplicate_words(payee)
|
|
payee.strip()
|
|
return payee
|
|
|
|
|
|
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
|
|
reader = csv.DictReader(f)
|
|
# The reader.line_num is the source line number, not the spreadsheet row
|
|
# number due to multi-line records.
|
|
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
|
|
|
|
|
|
def validate_amex_csv(sample: str, account: str) -> None:
|
|
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
|
reader = csv.DictReader(io.StringIO(sample))
|
|
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
|
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
|
|
|
|
|
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
|
"""Turn an AMEX CSV row into a standard dict format representing a transaction."""
|
|
# NOTE: Statement doesn't seem to give us a running balance or a final total.
|
|
return {
|
|
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
|
|
'amount': -1 * decimal.Decimal(row['Amount']),
|
|
# Descriptions have too much noise, so taking just the start
|
|
# significantly assists the fuzzy matching.
|
|
'payee': remove_payee_junk(row['Description'] or '')[:20],
|
|
'check_id': '',
|
|
'line': line,
|
|
}
|
|
|
|
|
|
def validate_fr_csv(sample: str, account: str) -> None:
|
|
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
|
|
reader = csv.DictReader(io.StringIO(sample))
|
|
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
|
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
|
|
|
|
|
def standardize_fr_record(row: Dict, line: int) -> Dict:
|
|
return {
|
|
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
|
|
'amount': decimal.Decimal(row['Amount']),
|
|
'payee': remove_payee_junk(row['Detail'] or '')[:20],
|
|
'check_id': row['Serial Num'].lstrip('0'),
|
|
'line': line,
|
|
}
|
|
|
|
|
|
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
|
"""Turn a Beancount query result row into a standard dict representing a transaction."""
|
|
return {
|
|
'date': row.date,
|
|
'amount': row.number_cost_position,
|
|
'payee': remove_payee_junk(f'{row.payee or ""} {row.entity or ""} {row.narration or ""}'),
|
|
'check_id': str(row.check_id or ''),
|
|
'filename': row.filename,
|
|
'line': row.line,
|
|
'bank_statement': row.bank_statement,
|
|
}
|
|
|
|
|
|
def format_record(record: dict) -> str:
|
|
if record['payee'] and record['check_id']:
|
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
|
|
elif record['payee']:
|
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:35]}".ljust(59)
|
|
else:
|
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} #{record['check_id']}".ljust(59)
|
|
return output
|
|
|
|
|
|
def format_multirecord(r1s: list[dict], r2s: list[dict], note: str) -> list[list]:
|
|
assert len(r1s) == 1
|
|
assert len(r2s) > 1
|
|
match_output = []
|
|
match_output.append([r1s[0]['date'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}'])
|
|
for r2 in r2s[1:]:
|
|
match_output.append([r1s[0]['date'], f'{r1s[0]["date"].isoformat()}: ↳ → {format_record(r2)} ✓ Matched{note}'])
|
|
return match_output
|
|
|
|
|
|
def sort_records(records: List) -> List:
|
|
return sorted(records, key=lambda x: (x['date'], x['amount']))
|
|
|
|
|
|
def first_word_exact_match(a: str, b: str) -> float:
|
|
if len(a) == 0 or len(b) == 0:
|
|
return 0.0
|
|
first_a = a.split()[0].strip()
|
|
first_b = b.split()[0].strip()
|
|
if first_a.casefold() == first_b.casefold():
|
|
return min(1.0, 0.2 * len(first_a))
|
|
else:
|
|
return 0.0
|
|
|
|
|
|
def payee_match(a: str, b: str) -> float:
|
|
fuzzy_match = float(fuzz.token_set_ratio(a, b) / 100.00)
|
|
first_word_match = first_word_exact_match(a, b)
|
|
return max(fuzzy_match, first_word_match)
|
|
|
|
|
|
def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]:
|
|
"""Do these records represent the same transaction?"""
|
|
|
|
date_score = date_proximity(r1['date'], r2['date'])
|
|
if r1['date'] == r2['date']:
|
|
date_message = ''
|
|
elif date_score > 0.0:
|
|
diff = abs((r1['date'] - r2['date']).days)
|
|
date_message = f'+/- {diff} days'
|
|
else:
|
|
date_message = 'date mismatch'
|
|
|
|
if r1['amount'] == r2['amount']:
|
|
amount_score, amount_message = 2.0, ''
|
|
else:
|
|
amount_score, amount_message = 0.0, 'amount mismatch'
|
|
|
|
# We never consider payee if there's a check_id in the books.
|
|
check_message = ''
|
|
payee_message = ''
|
|
# Sometimes we get unrelated numbers in the statement column with check-ids,
|
|
# so we can't match based on the existence of a statement check-id.
|
|
if r2['check_id']:
|
|
payee_score = 0.0
|
|
if r1['check_id'] and r2['check_id'] and r1['check_id'] == r2['check_id']:
|
|
check_score = 1.0
|
|
else:
|
|
check_message = 'check-id mismatch'
|
|
check_score = 0.0
|
|
else:
|
|
check_score = 0.0
|
|
payee_score = payee_match(r1['payee'], r2['payee'])
|
|
if payee_score > 0.8:
|
|
payee_message = ''
|
|
elif payee_score > 0.4:
|
|
payee_message = 'partial payee match'
|
|
else:
|
|
payee_message = 'payee mismatch'
|
|
|
|
overall_score = (date_score + amount_score + check_score + payee_score) / 4
|
|
overall_message = [m for m in [date_message, amount_message, check_message, payee_message] if m]
|
|
return overall_score, overall_message
|
|
|
|
|
|
def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
|
|
"""
|
|
Runs through all the statement transactions to find a matching transaction
|
|
in the books. If found, the books transaction is marked off so that it can
|
|
only be matched once. Some transactions will be matched, some will be on the
|
|
statement but not the books and some on the books but not the statement.
|
|
"""
|
|
matches = []
|
|
remaining_books_trans = []
|
|
remaining_statement_trans = []
|
|
|
|
for r1 in statement_trans:
|
|
best_match_score = 0.0
|
|
best_match_index = None
|
|
best_match_note = []
|
|
matches_found = 0
|
|
for i, r2 in enumerate(books_trans):
|
|
score, note = records_match(r1, r2)
|
|
if score >= 0.5 and score >= best_match_score:
|
|
matches_found += 1
|
|
best_match_score = score
|
|
best_match_index = i
|
|
best_match_note = note
|
|
if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
|
|
matches.append(([r1], [books_trans[best_match_index]], best_match_note))
|
|
# Don't try to make a second match against this books entry.
|
|
if best_match_index is not None:
|
|
del books_trans[best_match_index]
|
|
else:
|
|
remaining_statement_trans.append(r1)
|
|
for r2 in books_trans:
|
|
remaining_books_trans.append(r2)
|
|
return matches, remaining_statement_trans, remaining_books_trans
|
|
|
|
|
|
# TODO: Return list of tuples (instead of list of lists).
|
|
|
|
def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]:
|
|
match_output = []
|
|
for r1s, r2s, note in matches:
|
|
note = ', '.join(note)
|
|
note = ': ' + note if note else note
|
|
if r1s and r2s:
|
|
if show_reconciled_matches or not all(x['bank_statement'] for x in r2s):
|
|
if len(r2s) == 1:
|
|
entry = [r1s[0]['date'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}']
|
|
if 'payee mismatch' in note:
|
|
entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL
|
|
match_output.append(entry)
|
|
else:
|
|
match_output.extend(format_multirecord(r1s, r2s, note))
|
|
elif r1s:
|
|
match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL])
|
|
else:
|
|
match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59} → {format_record(r2s[0])} ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL])
|
|
return match_output
|
|
|
|
|
|
def date_proximity(d1: datetime.date, d2: datetime.date) -> float:
|
|
diff = abs(int((d1 - d2).days))
|
|
if diff > 60:
|
|
return 0.0
|
|
else:
|
|
return 1.0 - (diff / 60.0)
|
|
|
|
|
|
def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]:
|
|
# Can we really ever have multiple statement entries? Probably not.
|
|
statement_filename = get_repo_relative_path(statement_filename)
|
|
csv_filename = get_repo_relative_path(csv_filename)
|
|
metadata = []
|
|
statement_entries, books_entries, _ = match
|
|
for books_entry in books_entries:
|
|
for statement_entry in statement_entries:
|
|
if not books_entry['bank_statement']:
|
|
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement: "{statement_filename}"'))
|
|
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"'))
|
|
return metadata
|
|
|
|
|
|
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
|
"""Insert reconciliation metadata in the books files.
|
|
|
|
Takes a list of edits to make as tuples of form (filename, lineno, metadata):
|
|
|
|
[
|
|
('2021/main.beancount', 4245, ' bank-statement: statement.pdf'),
|
|
('2021/main.beancount', 1057, ' bank-statement: statement.pdf'),
|
|
('2021/payroll.beancount', 257, ' bank-statement: statement.pdf'),
|
|
...,
|
|
]
|
|
|
|
"""
|
|
file_contents: dict[str, list] = {}
|
|
file_offsets: dict[str, int] = collections.defaultdict(int)
|
|
# Load each books file into memory and insert the relevant metadata lines.
|
|
# Line numbers change as we do this, so we keep track of the offset for each
|
|
# file. Changes must be sorted by line number first or else the offsets will
|
|
# break because we're jumping around making edits.
|
|
for filename, line, metadata in sorted(metadata_to_apply):
|
|
if filename not in file_contents:
|
|
with open(filename, 'r') as f:
|
|
file_contents[filename] = f.readlines()
|
|
# Insert is inefficient, but fast enough for now in practise.
|
|
file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n')
|
|
file_offsets[filename] += 1
|
|
# Writes each updated file back to disk.
|
|
for filename, contents in file_contents.items():
|
|
with open(filename, 'w') as f:
|
|
f.writelines(contents)
|
|
print(f'Wrote {filename}.')
|
|
|
|
|
|
def get_repo_relative_path(path: str) -> str:
|
|
return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY'))
|
|
|
|
|
|
def parse_path(path: str) -> str:
|
|
if not os.path.exists(path):
|
|
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
|
|
return path
|
|
|
|
|
|
def parse_repo_relative_path(path: str) -> str:
|
|
if not os.path.exists(path):
|
|
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
|
|
repo = os.getenv('CONSERVANCY_REPOSITORY')
|
|
if not repo:
|
|
raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.')
|
|
if not path.startswith(repo):
|
|
raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
|
|
return path
|
|
|
|
|
|
def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
|
|
"""decimal.Decimal can't parse numbers with thousands separator."""
|
|
number_text = number_text.replace(',', '')
|
|
return decimal.Decimal(number_text)
|
|
|
|
|
|
def parse_arguments(argv: List[str]) -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
|
|
cliutil.add_version_argument(parser)
|
|
cliutil.add_loglevel_argument(parser)
|
|
parser.add_argument('--beancount-file', required=True, type=parse_path)
|
|
parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
|
|
parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
|
|
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
|
|
# parser.add_argument('--report-group-regex')
|
|
parser.add_argument('--show-reconciled-matches', action='store_true')
|
|
parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books") # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
|
|
args = parser.parse_args(args=argv)
|
|
return args
|
|
|
|
|
|
def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
|
|
total_matched = decimal.Decimal(0)
|
|
total_missing_from_books = decimal.Decimal(0)
|
|
total_missing_from_statement = decimal.Decimal(0)
|
|
for statement_entries, books_entries, _ in matches:
|
|
if statement_entries and books_entries:
|
|
total_matched += sum(c['amount'] for c in statement_entries)
|
|
elif statement_entries:
|
|
total_missing_from_books += sum(c['amount'] for c in statement_entries)
|
|
else:
|
|
total_missing_from_statement += sum(c['amount'] for c in books_entries)
|
|
return total_matched, total_missing_from_books, total_missing_from_statement
|
|
|
|
|
|
def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
|
|
matches = []
|
|
remaining_books_trans = []
|
|
remaining_statement_trans = []
|
|
|
|
groups = itertools.groupby(books_trans, key=lambda x: (x['date'], x['payee']))
|
|
for _, group in groups:
|
|
best_match_score = 0.0
|
|
best_match_index = None
|
|
best_match_note = []
|
|
matches_found = 0
|
|
|
|
group_items = list(group)
|
|
total = sum(x['amount'] for x in group_items)
|
|
r2 = copy.copy(group_items[0])
|
|
r2['amount'] = total
|
|
for i, r1 in enumerate(statement_trans):
|
|
score, note = records_match(r1, r2)
|
|
if score >= 0.5 and score >= best_match_score:
|
|
matches_found += 1
|
|
best_match_score = score
|
|
best_match_index = i
|
|
best_match_note = note
|
|
if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8:
|
|
matches.append(([statement_trans[best_match_index]], group_items, best_match_note))
|
|
if best_match_index is not None:
|
|
del statement_trans[best_match_index]
|
|
else:
|
|
remaining_books_trans.append(r2)
|
|
for r1 in statement_trans:
|
|
remaining_statement_trans.append(r1)
|
|
return matches, remaining_statement_trans, remaining_books_trans
|
|
|
|
|
|
def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]:
|
|
matches: List[Tuple[List, List, List]] = []
|
|
for r1 in statement_trans:
|
|
matches.append(([r1], [], ['no match']))
|
|
for r2 in books_trans:
|
|
matches.append(([], [r2], ['no match']))
|
|
return matches
|
|
|
|
|
|
def main(arglist: Optional[Sequence[str]] = None,
|
|
stdout: TextIO = sys.stdout,
|
|
stderr: TextIO = sys.stderr,
|
|
config: Optional[configmod.Config] = None,
|
|
) -> int:
|
|
args = parse_arguments(arglist)
|
|
cliutil.set_loglevel(logger, args.loglevel)
|
|
if config is None:
|
|
config = configmod.Config()
|
|
config.load_file()
|
|
|
|
# TODO: Should put in a sanity check to make sure the statement you're feeding
|
|
# in matches the account you've provided.
|
|
|
|
# TODO: Can we open the files first, then pass the streams on to the rest of the program?
|
|
|
|
if 'AMEX' in args.account:
|
|
validate_csv = validate_amex_csv
|
|
standardize_statement_record = standardize_amex_record
|
|
else:
|
|
validate_csv = validate_fr_csv
|
|
standardize_statement_record = standardize_fr_record
|
|
|
|
with open(args.csv_statement) as f:
|
|
sample = f.read(200)
|
|
validate_csv(sample, args.account)
|
|
f.seek(0)
|
|
statement_trans = read_transactions_from_csv(f, standardize_statement_record)
|
|
|
|
begin_date = statement_trans[0]['date']
|
|
end_date = statement_trans[-1]['date']
|
|
|
|
# Do we traverse and filter the in-memory entries list and filter that, or do we
|
|
# use Beancount Query Language (BQL) to get a list of transactions? Currently
|
|
# using BQL.
|
|
#
|
|
# beancount.query.query_compile.compile() and
|
|
# beancount.query.query_execute.filter_entries() look useful in this respect,
|
|
# but I'm not clear on how to use compile(). An example would help.
|
|
entries, _, options = loader.load_file(args.beancount_file)
|
|
|
|
# books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
|
|
# AND date <= {end_date.isoformat()}"""
|
|
# _, result_rows = run_query(entries, options, books_balance_query, numberify=True)
|
|
# books_balance = result_rows[0][0] if result_rows else 0
|
|
|
|
# String concatenation looks bad, but there's no SQL injection possible here
|
|
# because BQL can't write back to the Beancount files. I hope!
|
|
query = f'SELECT filename, META("lineno") AS line, META("bank-statement") AS bank_statement, date, number(cost(position)), payee, ENTRY_META("entity") as entity, ANY_META("check-id") as check_id, narration where account = "{args.account}" and date >= {begin_date} and date <= {end_date}'
|
|
_, result_rows = run_query(entries, options, query)
|
|
|
|
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
|
|
|
matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
|
|
subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans)
|
|
matches.extend(subset_matches)
|
|
unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
|
|
matches.extend(unmatched)
|
|
|
|
match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
|
|
|
|
_, total_missing_from_books, total_missing_from_statement = totals(matches)
|
|
|
|
print('-' * 155)
|
|
statement_heading = f'Statement transactions {begin_date} to {end_date}'
|
|
print(f'{statement_heading:<52} {"Books transactions":<58} Notes')
|
|
print('-' * 155)
|
|
for _, output in sorted(match_output, key=lambda x: x[0]):
|
|
print(output)
|
|
print('-' * 155)
|
|
print(f'Sub-total not on statement: {total_missing_from_statement:12,.2f}')
|
|
print(f'Sub-total not in books: {total_missing_from_books:12,.2f}')
|
|
print(f'Total: {total_missing_from_statement + total_missing_from_books:12,.2f}')
|
|
print('-' * 155)
|
|
|
|
# Write statement metadata back to books
|
|
metadata_to_apply = []
|
|
for match in matches:
|
|
metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
|
|
if metadata_to_apply and not args.non_interactive:
|
|
print('Mark matched transactions as reconciled in the books? (y/N) ', end='')
|
|
if input().lower() == 'y':
|
|
write_metadata_to_books(metadata_to_apply)
|
|
|
|
|
|
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
|
|
|
|
if __name__ == '__main__':
|
|
exit(entry_point())
|