diff --git a/conservancy_beancount/reconcile/statement_reconciler.py b/conservancy_beancount/reconcile/statement_reconciler.py index f42000c..c874d4f 100644 --- a/conservancy_beancount/reconcile/statement_reconciler.py +++ b/conservancy_beancount/reconcile/statement_reconciler.py @@ -118,6 +118,7 @@ from .. import config as configmod if not sys.warnoptions: import warnings + # Disable annoying warning from thefuzz prompting for a C extension. The # current pure-Python implementation isn't a bottleneck for us. warnings.filterwarnings('ignore', category=UserWarning, module='thefuzz.fuzz') @@ -200,7 +201,9 @@ def validate_amex_csv(sample: str) -> None: required_cols = {'Date', 'Amount', 'Description', 'Card Member'} reader = csv.DictReader(io.StringIO(sample)) if reader.fieldnames and not required_cols.issubset(reader.fieldnames): - sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.") + sys.exit( + f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution." + ) def standardize_amex_record(row: Dict, line: int) -> Dict: @@ -221,7 +224,9 @@ def read_amex_csv(f: TextIO) -> list: reader = csv.DictReader(f) # The reader.line_num is the source line number, not the spreadsheet row # number due to multi-line records. - return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)]) + return sort_records( + [standardize_amex_record(row, i) for i, row in enumerate(reader, 2)] + ) def validate_fr_csv(sample: str) -> None: @@ -236,7 +241,9 @@ def validate_fr_csv(sample: str) -> None: pass amount_found = '$' in row[4] and '$' in row[5] if len(row) != 6 or not date or not amount_found: - sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.") + sys.exit( + "This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution." + ) def standardize_fr_record(line, row): @@ -255,7 +262,8 @@ def read_fr_csv(f: TextIO) -> list: # The reader.line_num is the source line number, not the spreadsheet row # number due to multi-line records. return sort_records( - standardize_fr_record(i, row) for i, row in enumerate(reader, 1) + standardize_fr_record(i, row) + for i, row in enumerate(reader, 1) if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'} ) @@ -265,7 +273,9 @@ def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def] return { 'date': row.date, 'amount': row.number_cost_position, - 'payee': remove_payee_junk(f'{row.payee or ""} {row.entity or ""} {row.narration or ""}'), + 'payee': remove_payee_junk( + f'{row.payee or ""} {row.entity or ""} {row.narration or ""}' + ), 'check_id': str(row.check_id or ''), 'filename': row.filename, 'line': row.line, @@ -289,15 +299,27 @@ def format_multirecord(r1s: List[dict], r2s: List[dict], note: str) -> List[list assert len(r1s) == 1 assert len(r2s) > 1 match_output = [] - match_output.append([r1s[0]['date'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}']) + match_output.append( + [ + r1s[0]['date'], + f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}', + ] + ) for r2 in r2s[1:]: - match_output.append([r1s[0]['date'], f'{r1s[0]["date"].isoformat()}: ↳ → {format_record(r2)} ✓ Matched{note}']) + match_output.append( + [ + r1s[0]['date'], + f'{r1s[0]["date"].isoformat()}: ↳ → {format_record(r2)} ✓ Matched{note}', + ] + ) return match_output def _start_of_month(time, offset_months=0): if offset_months > 0: - return _start_of_month(time.replace(day=28) + datetime.timedelta(days=4), offset_months - 1) + return _start_of_month( + time.replace(day=28) + datetime.timedelta(days=4), offset_months - 1 + ) else: return time.replace(day=1) @@ -306,7 +328,8 @@ def round_to_month(begin_date, end_date): """Round a beginning and end date to beginning and end of months respectively.""" return ( _start_of_month(begin_date), - _start_of_month(end_date, offset_months=1) - datetime.timedelta(days=1)) + _start_of_month(end_date, offset_months=1) - datetime.timedelta(days=1), + ) def sort_records(records: List) -> List: @@ -377,11 +400,15 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]: payee_message = 'payee mismatch' overall_score = (date_score + amount_score + check_score + payee_score) / 4 - overall_message = [m for m in [date_message, amount_message, check_message, payee_message] if m] + overall_message = [ + m for m in [date_message, amount_message, check_message, payee_message] if m + ] return overall_score, overall_message -def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: +def match_statement_and_books( + statement_trans: List[Dict], books_trans: List[Dict] +) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: """Match transactions between the statement and books. If matched, the books transaction is marked off so that it can @@ -413,7 +440,12 @@ def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dic best_match_score = score best_match_index = i best_match_note = note - if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8: + if ( + best_match_score > 0.5 + and matches_found == 1 + and 'check-id mismatch' not in best_match_note + or best_match_score > 0.8 + ): matches.append(([r1], [books_trans[best_match_index]], best_match_note)) # Don't try to make a second match against this books entry. if best_match_index is not None: @@ -425,7 +457,9 @@ def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dic return matches, remaining_statement_trans, remaining_books_trans -def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: +def subset_match( + statement_trans: List[dict], books_trans: List[dict] +) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: """Match single statement transactions with multiple books transactions. Works similarly to match_statement_and_books in that it returns a @@ -455,8 +489,15 @@ def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[ best_match_score = score best_match_index = i best_match_note = note - if best_match_score > 0.5 and matches_found == 1 and 'check-id mismatch' not in best_match_note or best_match_score > 0.8: - matches.append(([statement_trans[best_match_index]], group_items, best_match_note)) + if ( + best_match_score > 0.5 + and matches_found == 1 + and 'check-id mismatch' not in best_match_note + or best_match_score > 0.8 + ): + matches.append( + ([statement_trans[best_match_index]], group_items, best_match_note) + ) if best_match_index is not None: del statement_trans[best_match_index] else: @@ -468,7 +509,10 @@ def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[ # TODO: Return list of tuples (instead of list of lists). -def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]: + +def format_matches( + matches: List, csv_statement: str, show_reconciled_matches: bool +) -> List[List]: """Produce a list of body output lines from the given matches. @@ -484,16 +528,35 @@ def format_matches(matches: List, csv_statement: str, show_reconciled_matches: b if r1s and r2s: if show_reconciled_matches or not all(x['bank_statement'] for x in r2s): if len(r2s) == 1: - entry = [r1s[0]['date'], f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}'] + entry = [ + r1s[0]['date'], + f'{format_record(r1s[0])} → {format_record(r2s[0])} ✓ Matched{note}', + ] if 'payee mismatch' in note: entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL match_output.append(entry) else: match_output.extend(format_multirecord(r1s, r2s, note)) elif r1s: - match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL]) + match_output.append( + [ + r1s[0]['date'], + Fore.RED + + Style.BRIGHT + + f'{format_record(r1s[0])} → {" ":^59} ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + + Style.RESET_ALL, + ] + ) else: - match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59} → {format_record(r2s[0])} ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL]) + match_output.append( + [ + r2s[0]['date'], + Fore.RED + + Style.BRIGHT + + f'{" ":^59} → {format_record(r2s[0])} ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + + Style.RESET_ALL, + ] + ) return match_output @@ -507,7 +570,9 @@ def date_proximity(d1: datetime.date, d2: datetime.date) -> float: return 1.0 - (diff / ZERO_CUTOFF) -def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]: +def metadata_for_match( + match: Tuple[List, List, List], statement_filename: str, csv_filename: str +) -> List[Tuple[str, int, str]]: """Returns the bank-statement metadata that should be applied for a match.""" # TODO: Our data structure would allow multiple statement entries # for a match, but would this ever make sense? Probably not. @@ -518,8 +583,20 @@ def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, for books_entry in books_entries: for statement_entry in statement_entries: if not books_entry['bank_statement']: - metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement: "{statement_filename}"')) - metadata.append((books_entry['filename'], books_entry['line'], f' bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"')) + metadata.append( + ( + books_entry['filename'], + books_entry['line'], + f' bank-statement: "{statement_filename}"', + ) + ) + metadata.append( + ( + books_entry['filename'], + books_entry['line'], + f' bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"', + ) + ) return metadata @@ -550,7 +627,9 @@ def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> No with open(filename, 'r') as f: file_contents[filename] = f.readlines() # Insert is inefficient, but fast enough for now in practise. - file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n') + file_contents[filename].insert( + line + file_offsets[filename], metadata.rstrip() + '\n' + ) file_offsets[filename] += 1 # Writes each updated file back to disk. for filename, contents in file_contents.items(): @@ -589,7 +668,9 @@ def parse_repo_relative_path(path: str) -> str: if not repo: raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.') if not path.startswith(repo): - raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.') + raise argparse.ArgumentTypeError( + f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.' + ) return path @@ -606,16 +687,28 @@ def parse_arguments(argv: List[str]) -> argparse.Namespace: parser.add_argument('--beancount-file', required=True, type=parse_path) parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path) parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path) - parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX') + parser.add_argument( + '--account', required=True, help='eg. Liabilities:CreditCard:AMEX' + ) # parser.add_argument('--report-group-regex') parser.add_argument('--show-reconciled-matches', action='store_true') - parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books") # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals") - parser.add_argument('--full-months', action='store_true', help='Match payments over the full month, rather that just between the beginning and end dates of the CSV statement') + parser.add_argument( + '--non-interactive', + action='store_true', + help="Don't prompt to write to the books", + ) # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals") + parser.add_argument( + '--full-months', + action='store_true', + help='Match payments over the full month, rather that just between the beginning and end dates of the CSV statement', + ) args = parser.parse_args(args=argv) return args -def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]: +def totals( + matches: List[Tuple[List, List, List]] +) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]: """Calculate the totals of transactions matched/not-matched.""" total_matched = decimal.Decimal(0) total_missing_from_books = decimal.Decimal(0) @@ -630,7 +723,9 @@ def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, dec return total_matched, total_missing_from_books, total_missing_from_statement -def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]: +def process_unmatched( + statement_trans: List[dict], books_trans: List[dict] +) -> List[Tuple[List, List, List]]: """Format the remaining unmatched transactions to be added to one single list of matches.""" matches: List[Tuple[List, List, List]] = [] for r1 in statement_trans: @@ -640,29 +735,41 @@ def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> L return matches -def format_output(matches, begin_date, end_date, csv_statement, show_reconciled_matches) -> str: +def format_output( + matches, begin_date, end_date, csv_statement, show_reconciled_matches +) -> str: with io.StringIO() as out: match_output = format_matches(matches, csv_statement, show_reconciled_matches) _, total_missing_from_books, total_missing_from_statement = totals(matches) print('-' * 155, file=out) statement_heading = f'Statement transactions {begin_date} to {end_date}' - print(f'{statement_heading:<52} {"Books transactions":<58} Notes', file=out) + print( + f'{statement_heading:<52} {"Books transactions":<58} Notes', + file=out, + ) print('-' * 155, file=out) for _, output in sorted(match_output, key=lambda x: x[0]): print(output, file=out) print('-' * 155, file=out) - print(f'Sub-total not on statement: {total_missing_from_statement:12,.2f}', file=out) + print( + f'Sub-total not on statement: {total_missing_from_statement:12,.2f}', + file=out, + ) print(f'Sub-total not in books: {total_missing_from_books:12,.2f}', file=out) - print(f'Total: {total_missing_from_statement + total_missing_from_books:12,.2f}', file=out) + print( + f'Total: {total_missing_from_statement + total_missing_from_books:12,.2f}', + file=out, + ) print('-' * 155, file=out) return out.getvalue() -def main(arglist: Optional[Sequence[str]] = None, - stdout: TextIO = sys.stdout, - stderr: TextIO = sys.stderr, - config: Optional[configmod.Config] = None, - ) -> int: +def main( + arglist: Optional[Sequence[str]] = None, + stdout: TextIO = sys.stdout, + stderr: TextIO = sys.stderr, + config: Optional[configmod.Config] = None, +) -> int: args = parse_arguments(arglist) cliutil.set_loglevel(logger, args.loglevel) if config is None: @@ -727,9 +834,14 @@ def main(arglist: Optional[Sequence[str]] = None, # Apply two passes of matching, one for standard matches and one # for subset matches. - matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans) + ( + matches, + remaining_statement_trans, + remaining_books_trans, + ) = match_statement_and_books(statement_trans, books_trans) subset_matches, remaining_statement_trans, remaining_books_trans = subset_match( - remaining_statement_trans, remaining_books_trans) + remaining_statement_trans, remaining_books_trans + ) matches.extend(subset_matches) # Add the remaining unmatched to make one big list of matches, successful or not. @@ -737,12 +849,22 @@ def main(arglist: Optional[Sequence[str]] = None, matches.extend(unmatched) # Print out results of our matching. - print(format_output(matches, begin_date, end_date, args.csv_statement, args.show_reconciled_matches)) + print( + format_output( + matches, + begin_date, + end_date, + args.csv_statement, + args.show_reconciled_matches, + ) + ) # Write statement metadata back to the books. metadata_to_apply = [] for match in matches: - metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement)) + metadata_to_apply.extend( + metadata_for_match(match, args.bank_statement, args.csv_statement) + ) if metadata_to_apply and not args.non_interactive: print('Mark matched transactions as reconciled in the books? (y/N) ', end='') if input().lower() == 'y':