reconcile: Update docs, move a few things around.
This commit is contained in:
parent
97a05003f3
commit
581ef0fa23
1 changed files with 222 additions and 165 deletions
|
@ -44,23 +44,25 @@ similar directives are already present. This is a bit like diff-ing a
|
||||||
statement with the books (though we're only interested in the presence
|
statement with the books (though we're only interested in the presence
|
||||||
of lines, not so much their order).
|
of lines, not so much their order).
|
||||||
|
|
||||||
|
Paper checks are entered in the books when written (a.k.a. "posted"),
|
||||||
|
but may not be cashed until months later sometimes causing
|
||||||
|
reconciliation differences that live beyond a month. It's worth noting
|
||||||
|
that there are really two dates here - the posting date and the
|
||||||
|
cleared date. Beancount only allows us to model one, which is why
|
||||||
|
carrying these reconciliation differences between months feels a bit
|
||||||
|
awkward.
|
||||||
|
|
||||||
Problems in scope:
|
Problems in scope:
|
||||||
|
|
||||||
- errors in the books take hours to find during reconciliation,
|
- errors in the books take hours to find during reconciliation,
|
||||||
requiring manually comparing statemnts and the books and are
|
requiring manually comparing statements and the books and are
|
||||||
succeptible to mistakes, such as not noticing when there are two
|
succeptible to mistakes, such as not noticing when there are two
|
||||||
payments for the same amount on the statement, but not in the books
|
payments for the same amount on the statement, but not in the books
|
||||||
("you're entering a world of pain")
|
(as Bradley likes to quote, "you're entering a world of pain")
|
||||||
|
|
||||||
- adding statement/reconciliation metadata to books is/was manual and
|
- adding statement/reconciliation metadata to books is/was manual and
|
||||||
prone to mistakes
|
prone to mistakes
|
||||||
|
|
||||||
- Beancount doesn't provide any infrastructure for programmatically
|
|
||||||
updating the books, only appending in the case of importers
|
|
||||||
|
|
||||||
- paper checks are entered in the books when written, but may not be
|
|
||||||
cashed until months later (reconcile errors)
|
|
||||||
|
|
||||||
- jumping to an individual transaction in a large ledger isn't
|
- jumping to an individual transaction in a large ledger isn't
|
||||||
trivial - Emacs grep mode is the current best option
|
trivial - Emacs grep mode is the current best option
|
||||||
|
|
||||||
|
@ -244,6 +246,7 @@ def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
||||||
|
|
||||||
|
|
||||||
def format_record(record: dict) -> str:
|
def format_record(record: dict) -> str:
|
||||||
|
"""Generate output lines for a standard 1:1 match."""
|
||||||
if record['payee'] and record['check_id']:
|
if record['payee'] and record['check_id']:
|
||||||
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
|
output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59)
|
||||||
elif record['payee']:
|
elif record['payee']:
|
||||||
|
@ -254,6 +257,7 @@ def format_record(record: dict) -> str:
|
||||||
|
|
||||||
|
|
||||||
def format_multirecord(r1s: list[dict], r2s: list[dict], note: str) -> list[list]:
|
def format_multirecord(r1s: list[dict], r2s: list[dict], note: str) -> list[list]:
|
||||||
|
"""Generates output lines for one statement:multiple books transaction match."""
|
||||||
assert len(r1s) == 1
|
assert len(r1s) == 1
|
||||||
assert len(r2s) > 1
|
assert len(r2s) > 1
|
||||||
match_output = []
|
match_output = []
|
||||||
|
@ -268,6 +272,13 @@ def sort_records(records: List) -> List:
|
||||||
|
|
||||||
|
|
||||||
def first_word_exact_match(a: str, b: str) -> float:
|
def first_word_exact_match(a: str, b: str) -> float:
|
||||||
|
"""Score a payee match based first word.
|
||||||
|
|
||||||
|
We get a whole lot of good matches this way. Helps in the
|
||||||
|
situation where the first word or two of a transaction description
|
||||||
|
is useful and the rest is garbage.
|
||||||
|
|
||||||
|
"""
|
||||||
if len(a) == 0 or len(b) == 0:
|
if len(a) == 0 or len(b) == 0:
|
||||||
return 0.0
|
return 0.0
|
||||||
first_a = a.split()[0].strip()
|
first_a = a.split()[0].strip()
|
||||||
|
@ -279,6 +290,7 @@ def first_word_exact_match(a: str, b: str) -> float:
|
||||||
|
|
||||||
|
|
||||||
def payee_match(a: str, b: str) -> float:
|
def payee_match(a: str, b: str) -> float:
|
||||||
|
"""Score a match between two payees."""
|
||||||
fuzzy_match = float(fuzz.token_set_ratio(a, b) / 100.00)
|
fuzzy_match = float(fuzz.token_set_ratio(a, b) / 100.00)
|
||||||
first_word_match = first_word_exact_match(a, b)
|
first_word_match = first_word_exact_match(a, b)
|
||||||
return max(fuzzy_match, first_word_match)
|
return max(fuzzy_match, first_word_match)
|
||||||
|
@ -286,7 +298,6 @@ def payee_match(a: str, b: str) -> float:
|
||||||
|
|
||||||
def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]:
|
def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]:
|
||||||
"""Do these records represent the same transaction?"""
|
"""Do these records represent the same transaction?"""
|
||||||
|
|
||||||
date_score = date_proximity(r1['date'], r2['date'])
|
date_score = date_proximity(r1['date'], r2['date'])
|
||||||
if r1['date'] == r2['date']:
|
if r1['date'] == r2['date']:
|
||||||
date_message = ''
|
date_message = ''
|
||||||
|
@ -329,11 +340,20 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]:
|
||||||
|
|
||||||
|
|
||||||
def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
|
def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
|
||||||
"""
|
"""Match transactions between the statement and books.
|
||||||
Runs through all the statement transactions to find a matching transaction
|
|
||||||
in the books. If found, the books transaction is marked off so that it can
|
If matched, the books transaction is marked off so that it can
|
||||||
only be matched once. Some transactions will be matched, some will be on the
|
only be matched once. Some transactions will be matched, some will
|
||||||
statement but not the books and some on the books but not the statement.
|
be on the statement but not the books and some on the books but
|
||||||
|
not the statement.
|
||||||
|
|
||||||
|
Passes through any unmatched transactions.
|
||||||
|
|
||||||
|
Currently we use the same matching logic for all types of
|
||||||
|
statements. It's conceivable that you could have special cases to
|
||||||
|
accurately match some types of statements, but that would be more
|
||||||
|
work to maintain and test.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
matches = []
|
matches = []
|
||||||
remaining_books_trans = []
|
remaining_books_trans = []
|
||||||
|
@ -363,141 +383,14 @@ def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dic
|
||||||
return matches, remaining_statement_trans, remaining_books_trans
|
return matches, remaining_statement_trans, remaining_books_trans
|
||||||
|
|
||||||
|
|
||||||
# TODO: Return list of tuples (instead of list of lists).
|
def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
|
||||||
|
"""Match single statement transactions with multiple books transactions.
|
||||||
|
|
||||||
def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]:
|
Works similarly to match_statement_and_books in that it returns a
|
||||||
match_output = []
|
list of matches and lists of remaining statement and books
|
||||||
for r1s, r2s, note in matches:
|
transactions.
|
||||||
note = ', '.join(note)
|
|
||||||
note = ': ' + note if note else note
|
|
||||||
if r1s and r2s:
|
|
||||||
if show_reconciled_matches or not all(x['bank_statement'] for x in r2s):
|
|
||||||
if len(r2s) == 1:
|
|
||||||
entry = [r1s[0]['date'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}']
|
|
||||||
if 'payee mismatch' in note:
|
|
||||||
entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL
|
|
||||||
match_output.append(entry)
|
|
||||||
else:
|
|
||||||
match_output.extend(format_multirecord(r1s, r2s, note))
|
|
||||||
elif r1s:
|
|
||||||
match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL])
|
|
||||||
else:
|
|
||||||
match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59} → {format_record(r2s[0])} ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL])
|
|
||||||
return match_output
|
|
||||||
|
|
||||||
|
|
||||||
def date_proximity(d1: datetime.date, d2: datetime.date) -> float:
|
|
||||||
diff = abs(int((d1 - d2).days))
|
|
||||||
if diff > 60:
|
|
||||||
return 0.0
|
|
||||||
else:
|
|
||||||
return 1.0 - (diff / 60.0)
|
|
||||||
|
|
||||||
|
|
||||||
def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]:
|
|
||||||
# Can we really ever have multiple statement entries? Probably not.
|
|
||||||
statement_filename = get_repo_relative_path(statement_filename)
|
|
||||||
csv_filename = get_repo_relative_path(csv_filename)
|
|
||||||
metadata = []
|
|
||||||
statement_entries, books_entries, _ = match
|
|
||||||
for books_entry in books_entries:
|
|
||||||
for statement_entry in statement_entries:
|
|
||||||
if not books_entry['bank_statement']:
|
|
||||||
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement: "{statement_filename}"'))
|
|
||||||
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"'))
|
|
||||||
return metadata
|
|
||||||
|
|
||||||
|
|
||||||
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
|
||||||
"""Insert reconciliation metadata in the books files.
|
|
||||||
|
|
||||||
Takes a list of edits to make as tuples of form (filename, lineno, metadata):
|
|
||||||
|
|
||||||
[
|
|
||||||
('2021/main.beancount', 4245, ' bank-statement: statement.pdf'),
|
|
||||||
('2021/main.beancount', 1057, ' bank-statement: statement.pdf'),
|
|
||||||
('2021/payroll.beancount', 257, ' bank-statement: statement.pdf'),
|
|
||||||
...,
|
|
||||||
]
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
file_contents: dict[str, list] = {}
|
|
||||||
file_offsets: dict[str, int] = collections.defaultdict(int)
|
|
||||||
# Load each books file into memory and insert the relevant metadata lines.
|
|
||||||
# Line numbers change as we do this, so we keep track of the offset for each
|
|
||||||
# file. Changes must be sorted by line number first or else the offsets will
|
|
||||||
# break because we're jumping around making edits.
|
|
||||||
for filename, line, metadata in sorted(metadata_to_apply):
|
|
||||||
if filename not in file_contents:
|
|
||||||
with open(filename, 'r') as f:
|
|
||||||
file_contents[filename] = f.readlines()
|
|
||||||
# Insert is inefficient, but fast enough for now in practise.
|
|
||||||
file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n')
|
|
||||||
file_offsets[filename] += 1
|
|
||||||
# Writes each updated file back to disk.
|
|
||||||
for filename, contents in file_contents.items():
|
|
||||||
with open(filename, 'w') as f:
|
|
||||||
f.writelines(contents)
|
|
||||||
print(f'Wrote {filename}.')
|
|
||||||
|
|
||||||
|
|
||||||
def get_repo_relative_path(path: str) -> str:
|
|
||||||
return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY'))
|
|
||||||
|
|
||||||
|
|
||||||
def parse_path(path: str) -> str:
|
|
||||||
if not os.path.exists(path):
|
|
||||||
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def parse_repo_relative_path(path: str) -> str:
|
|
||||||
if not os.path.exists(path):
|
|
||||||
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
|
|
||||||
repo = os.getenv('CONSERVANCY_REPOSITORY')
|
|
||||||
if not repo:
|
|
||||||
raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.')
|
|
||||||
if not path.startswith(repo):
|
|
||||||
raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
|
|
||||||
"""decimal.Decimal can't parse numbers with thousands separator."""
|
|
||||||
number_text = number_text.replace(',', '')
|
|
||||||
return decimal.Decimal(number_text)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments(argv: List[str]) -> argparse.Namespace:
|
|
||||||
parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
|
|
||||||
cliutil.add_version_argument(parser)
|
|
||||||
cliutil.add_loglevel_argument(parser)
|
|
||||||
parser.add_argument('--beancount-file', required=True, type=parse_path)
|
|
||||||
parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
|
|
||||||
parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
|
|
||||||
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
|
|
||||||
# parser.add_argument('--report-group-regex')
|
|
||||||
parser.add_argument('--show-reconciled-matches', action='store_true')
|
|
||||||
parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books") # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
|
|
||||||
args = parser.parse_args(args=argv)
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
|
|
||||||
total_matched = decimal.Decimal(0)
|
|
||||||
total_missing_from_books = decimal.Decimal(0)
|
|
||||||
total_missing_from_statement = decimal.Decimal(0)
|
|
||||||
for statement_entries, books_entries, _ in matches:
|
|
||||||
if statement_entries and books_entries:
|
|
||||||
total_matched += sum(c['amount'] for c in statement_entries)
|
|
||||||
elif statement_entries:
|
|
||||||
total_missing_from_books += sum(c['amount'] for c in statement_entries)
|
|
||||||
else:
|
|
||||||
total_missing_from_statement += sum(c['amount'] for c in books_entries)
|
|
||||||
return total_matched, total_missing_from_books, total_missing_from_statement
|
|
||||||
|
|
||||||
|
|
||||||
def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]:
|
|
||||||
matches = []
|
matches = []
|
||||||
remaining_books_trans = []
|
remaining_books_trans = []
|
||||||
remaining_statement_trans = []
|
remaining_statement_trans = []
|
||||||
|
@ -531,7 +424,171 @@ def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[
|
||||||
return matches, remaining_statement_trans, remaining_books_trans
|
return matches, remaining_statement_trans, remaining_books_trans
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Return list of tuples (instead of list of lists).
|
||||||
|
|
||||||
|
def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]:
|
||||||
|
|
||||||
|
"""Produce a list of body output lines from the given matches.
|
||||||
|
|
||||||
|
The first column is a date so we can re-sort the list to put the
|
||||||
|
missing entries in the right place. The second column is the text
|
||||||
|
output.
|
||||||
|
|
||||||
|
"""
|
||||||
|
match_output = []
|
||||||
|
for r1s, r2s, note in matches:
|
||||||
|
note = ', '.join(note)
|
||||||
|
note = ': ' + note if note else note
|
||||||
|
if r1s and r2s:
|
||||||
|
if show_reconciled_matches or not all(x['bank_statement'] for x in r2s):
|
||||||
|
if len(r2s) == 1:
|
||||||
|
entry = [r1s[0]['date'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}']
|
||||||
|
if 'payee mismatch' in note:
|
||||||
|
entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL
|
||||||
|
match_output.append(entry)
|
||||||
|
else:
|
||||||
|
match_output.extend(format_multirecord(r1s, r2s, note))
|
||||||
|
elif r1s:
|
||||||
|
match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL])
|
||||||
|
else:
|
||||||
|
match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59} → {format_record(r2s[0])} ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL])
|
||||||
|
return match_output
|
||||||
|
|
||||||
|
|
||||||
|
def date_proximity(d1: datetime.date, d2: datetime.date) -> float:
|
||||||
|
"""Scores two days based on how close they are together."""
|
||||||
|
ZERO_CUTOFF = 60 # Score will be zero for this many days apart.
|
||||||
|
diff = abs(int((d1 - d2).days))
|
||||||
|
if diff >= ZERO_CUTOFF:
|
||||||
|
return 0.0
|
||||||
|
else:
|
||||||
|
return 1.0 - (diff / ZERO_CUTOFF)
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]:
|
||||||
|
"""Returns the bank-statement metadata that should be applied for a match."""
|
||||||
|
# TODO: Our data structure would allow multiple statement entries
|
||||||
|
# for a match, but would this ever make sense? Probably not.
|
||||||
|
statement_filename = get_repo_relative_path(statement_filename)
|
||||||
|
csv_filename = get_repo_relative_path(csv_filename)
|
||||||
|
metadata = []
|
||||||
|
statement_entries, books_entries, _ = match
|
||||||
|
for books_entry in books_entries:
|
||||||
|
for statement_entry in statement_entries:
|
||||||
|
if not books_entry['bank_statement']:
|
||||||
|
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement: "{statement_filename}"'))
|
||||||
|
metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"'))
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None:
|
||||||
|
"""Insert reconciliation metadata in the books files.
|
||||||
|
|
||||||
|
Takes a list of edits to make as tuples of form (filename, lineno, metadata):
|
||||||
|
|
||||||
|
[
|
||||||
|
('2021/main.beancount', 4245, ' bank-statement: statement.pdf'),
|
||||||
|
('2021/main.beancount', 1057, ' bank-statement: statement.pdf'),
|
||||||
|
('2021/payroll.beancount', 257, ' bank-statement: statement.pdf'),
|
||||||
|
...,
|
||||||
|
]
|
||||||
|
|
||||||
|
Beancount doesn't provide any infrastructure for programmatically
|
||||||
|
updating the books, only appending in the case of importers. So
|
||||||
|
we're on our own here.
|
||||||
|
"""
|
||||||
|
file_contents: dict[str, list] = {}
|
||||||
|
file_offsets: dict[str, int] = collections.defaultdict(int)
|
||||||
|
# Load each books file into memory and insert the relevant metadata lines.
|
||||||
|
# Line numbers change as we do this, so we keep track of the offset for each
|
||||||
|
# file. Changes must be sorted by line number first or else the offsets will
|
||||||
|
# break because we're jumping around making edits.
|
||||||
|
for filename, line, metadata in sorted(metadata_to_apply):
|
||||||
|
if filename not in file_contents:
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
file_contents[filename] = f.readlines()
|
||||||
|
# Insert is inefficient, but fast enough for now in practise.
|
||||||
|
file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n')
|
||||||
|
file_offsets[filename] += 1
|
||||||
|
# Writes each updated file back to disk.
|
||||||
|
for filename, contents in file_contents.items():
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
f.writelines(contents)
|
||||||
|
print(f'Wrote {filename}.')
|
||||||
|
|
||||||
|
|
||||||
|
def get_repo_relative_path(path: str) -> str:
|
||||||
|
"""Chop off the unique per-person CONSERVANCY_REPOSITORY.
|
||||||
|
|
||||||
|
CSV and PDF statement metadata should be relative to
|
||||||
|
CONSERVANCY_REPOSITORY ie. without regards to exactly where on
|
||||||
|
your computer all the files live.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY'))
|
||||||
|
|
||||||
|
|
||||||
|
def parse_path(path: str) -> str:
|
||||||
|
"""Validate that a file exists for use in argparse."""
|
||||||
|
if not os.path.exists(path):
|
||||||
|
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def parse_repo_relative_path(path: str) -> str:
|
||||||
|
"""Validate that a file exists and is within $CONSERVANCY_REPOSITORY.
|
||||||
|
|
||||||
|
For use with argparse.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not os.path.exists(path):
|
||||||
|
raise argparse.ArgumentTypeError(f'File {path} does not exist.')
|
||||||
|
repo = os.getenv('CONSERVANCY_REPOSITORY')
|
||||||
|
if not repo:
|
||||||
|
raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.')
|
||||||
|
if not path.startswith(repo):
|
||||||
|
raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.')
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def parse_decimal_with_separator(number_text: str) -> decimal.Decimal:
|
||||||
|
"""decimal.Decimal can't parse numbers with thousands separator."""
|
||||||
|
number_text = number_text.replace(',', '')
|
||||||
|
return decimal.Decimal(number_text)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments(argv: List[str]) -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper')
|
||||||
|
cliutil.add_version_argument(parser)
|
||||||
|
cliutil.add_loglevel_argument(parser)
|
||||||
|
parser.add_argument('--beancount-file', required=True, type=parse_path)
|
||||||
|
parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path)
|
||||||
|
parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path)
|
||||||
|
parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX')
|
||||||
|
# parser.add_argument('--report-group-regex')
|
||||||
|
parser.add_argument('--show-reconciled-matches', action='store_true')
|
||||||
|
parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books") # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals")
|
||||||
|
args = parser.parse_args(args=argv)
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]:
|
||||||
|
"""Calculate the totals of transactions matched/not-matched."""
|
||||||
|
total_matched = decimal.Decimal(0)
|
||||||
|
total_missing_from_books = decimal.Decimal(0)
|
||||||
|
total_missing_from_statement = decimal.Decimal(0)
|
||||||
|
for statement_entries, books_entries, _ in matches:
|
||||||
|
if statement_entries and books_entries:
|
||||||
|
total_matched += sum(c['amount'] for c in statement_entries)
|
||||||
|
elif statement_entries:
|
||||||
|
total_missing_from_books += sum(c['amount'] for c in statement_entries)
|
||||||
|
else:
|
||||||
|
total_missing_from_statement += sum(c['amount'] for c in books_entries)
|
||||||
|
return total_matched, total_missing_from_books, total_missing_from_statement
|
||||||
|
|
||||||
|
|
||||||
def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]:
|
def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]:
|
||||||
|
"""Format the remaining unmatched transactions to be added to one single list of matches."""
|
||||||
matches: List[Tuple[List, List, List]] = []
|
matches: List[Tuple[List, List, List]] = []
|
||||||
for r1 in statement_trans:
|
for r1 in statement_trans:
|
||||||
matches.append(([r1], [], ['no match']))
|
matches.append(([r1], [], ['no match']))
|
||||||
|
@ -551,11 +608,8 @@ def main(arglist: Optional[Sequence[str]] = None,
|
||||||
config = configmod.Config()
|
config = configmod.Config()
|
||||||
config.load_file()
|
config.load_file()
|
||||||
|
|
||||||
# TODO: Should put in a sanity check to make sure the statement you're feeding
|
# Validate and normalise the statement into our standard
|
||||||
# in matches the account you've provided.
|
# transaction data structure.
|
||||||
|
|
||||||
# TODO: Can we open the files first, then pass the streams on to the rest of the program?
|
|
||||||
|
|
||||||
if 'AMEX' in args.account:
|
if 'AMEX' in args.account:
|
||||||
validate_csv = validate_amex_csv
|
validate_csv = validate_amex_csv
|
||||||
standardize_statement_record = standardize_amex_record
|
standardize_statement_record = standardize_amex_record
|
||||||
|
@ -569,40 +623,43 @@ def main(arglist: Optional[Sequence[str]] = None,
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
statement_trans = read_transactions_from_csv(f, standardize_statement_record)
|
statement_trans = read_transactions_from_csv(f, standardize_statement_record)
|
||||||
|
|
||||||
|
# Dates are taken from the beginning/end of the statement.
|
||||||
begin_date = statement_trans[0]['date']
|
begin_date = statement_trans[0]['date']
|
||||||
end_date = statement_trans[-1]['date']
|
end_date = statement_trans[-1]['date']
|
||||||
|
|
||||||
# Do we traverse and filter the in-memory entries list and filter that, or do we
|
# Query for the Beancount books data for this above period.
|
||||||
# use Beancount Query Language (BQL) to get a list of transactions? Currently
|
#
|
||||||
# using BQL.
|
# There are pros and cons for using Beancount's in-memory entries
|
||||||
|
# list directly and also for using Beancount Query Language (BQL)
|
||||||
|
# to get a list of transactions? Using BQL because it's
|
||||||
|
# convenient, but we don't have access to the full transaction
|
||||||
|
# entry objects. Feels a bit strange that these approaches are so
|
||||||
|
# disconnected.
|
||||||
#
|
#
|
||||||
# beancount.query.query_compile.compile() and
|
# beancount.query.query_compile.compile() and
|
||||||
# beancount.query.query_execute.filter_entries() look useful in this respect,
|
# beancount.query.query_execute.filter_entries() look useful in this respect,
|
||||||
# but I'm not clear on how to use compile(). An example would help.
|
# but I'm not clear on how to use compile(). An example would help.
|
||||||
entries, _, options = loader.load_file(args.beancount_file)
|
entries, _, options = loader.load_file(args.beancount_file)
|
||||||
|
|
||||||
# books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}"
|
|
||||||
# AND date <= {end_date.isoformat()}"""
|
|
||||||
# _, result_rows = run_query(entries, options, books_balance_query, numberify=True)
|
|
||||||
# books_balance = result_rows[0][0] if result_rows else 0
|
|
||||||
|
|
||||||
# String concatenation looks bad, but there's no SQL injection possible here
|
# String concatenation looks bad, but there's no SQL injection possible here
|
||||||
# because BQL can't write back to the Beancount files. I hope!
|
# because BQL can't write back to the Beancount files. I hope!
|
||||||
query = f'SELECT filename, META("lineno") AS line, META("bank-statement") AS bank_statement, date, number(cost(position)), payee, ENTRY_META("entity") as entity, ANY_META("check-id") as check_id, narration where account = "{args.account}" and date >= {begin_date} and date <= {end_date}'
|
query = f'SELECT filename, META("lineno") AS line, META("bank-statement") AS bank_statement, date, number(cost(position)), payee, ENTRY_META("entity") as entity, ANY_META("check-id") as check_id, narration where account = "{args.account}" and date >= {begin_date} and date <= {end_date}'
|
||||||
_, result_rows = run_query(entries, options, query)
|
_, result_rows = run_query(entries, options, query)
|
||||||
|
|
||||||
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
books_trans = sort_records([standardize_beancount_record(row) for row in result_rows])
|
||||||
|
|
||||||
|
# Apply two passes of matching, one for standard matches and one
|
||||||
|
# for subset matches.
|
||||||
matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
|
matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans)
|
||||||
subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans)
|
subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(
|
||||||
|
remaining_statement_trans, remaining_books_trans)
|
||||||
matches.extend(subset_matches)
|
matches.extend(subset_matches)
|
||||||
|
|
||||||
|
# Add the remaining unmatched to make one big list of matches, successful or not.
|
||||||
unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
|
unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans)
|
||||||
matches.extend(unmatched)
|
matches.extend(unmatched)
|
||||||
|
|
||||||
|
# Print out results of our matching.
|
||||||
match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
|
match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches)
|
||||||
|
|
||||||
_, total_missing_from_books, total_missing_from_statement = totals(matches)
|
_, total_missing_from_books, total_missing_from_statement = totals(matches)
|
||||||
|
|
||||||
print('-' * 155)
|
print('-' * 155)
|
||||||
statement_heading = f'Statement transactions {begin_date} to {end_date}'
|
statement_heading = f'Statement transactions {begin_date} to {end_date}'
|
||||||
print(f'{statement_heading:<52} {"Books transactions":<58} Notes')
|
print(f'{statement_heading:<52} {"Books transactions":<58} Notes')
|
||||||
|
@ -615,7 +672,7 @@ def main(arglist: Optional[Sequence[str]] = None,
|
||||||
print(f'Total: {total_missing_from_statement + total_missing_from_books:12,.2f}')
|
print(f'Total: {total_missing_from_statement + total_missing_from_books:12,.2f}')
|
||||||
print('-' * 155)
|
print('-' * 155)
|
||||||
|
|
||||||
# Write statement metadata back to books
|
# Write statement metadata back to the books.
|
||||||
metadata_to_apply = []
|
metadata_to_apply = []
|
||||||
for match in matches:
|
for match in matches:
|
||||||
metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
|
metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement))
|
||||||
|
|
Loading…
Reference in a new issue