From 3acc097d32c79f239047628b5f65aed9c31dbf14 Mon Sep 17 00:00:00 2001 From: Ben Sturmfels Date: Mon, 30 Jan 2023 23:21:42 +1100 Subject: [PATCH] reconcile: Update to support standard FR CSV format It seems as though we were previously using a transaction search export CSV, rather than the official statement CSV. --- .../reconcile/statement_reconciler.py | 75 ++++++++++++------- tests/test_reconcile.py | 71 +++++++++--------- 2 files changed, 83 insertions(+), 63 deletions(-) diff --git a/conservancy_beancount/reconcile/statement_reconciler.py b/conservancy_beancount/reconcile/statement_reconciler.py index e68898d..0cc69c4 100644 --- a/conservancy_beancount/reconcile/statement_reconciler.py +++ b/conservancy_beancount/reconcile/statement_reconciler.py @@ -90,6 +90,7 @@ Other related problems we're not dealing with here: """ # TODO: +# - entry_point seems to swallow errors # - extract the magic numbers # - consider merging in helper.py @@ -105,7 +106,7 @@ import logging import os import re import sys -from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO +from typing import Dict, List, Optional, Sequence, Tuple, TextIO from beancount import loader from beancount.query.query import run_query @@ -187,23 +188,16 @@ def remove_payee_junk(payee: str) -> str: return payee -def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list: - reader = csv.DictReader(f) - # The reader.line_num is the source line number, not the spreadsheet row - # number due to multi-line records. - return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)]) - - def parse_amount(amount: str) -> decimal.Decimal: """Parse amounts and handle comma separators as seen in some FR statements.""" - return decimal.Decimal(amount.replace(',', '')) + return decimal.Decimal(amount.replace('$', '').replace(',', '')) -def validate_amex_csv(sample: str, account: str) -> None: +def validate_amex_csv(sample: str) -> None: required_cols = {'Date', 'Amount', 'Description', 'Card Member'} reader = csv.DictReader(io.StringIO(sample)) if reader.fieldnames and not required_cols.issubset(reader.fieldnames): - sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") + sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.") def standardize_amex_record(row: Dict, line: int) -> Dict: @@ -220,21 +214,47 @@ def standardize_amex_record(row: Dict, line: int) -> Dict: } -def validate_fr_csv(sample: str, account: str) -> None: - required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'} - reader = csv.DictReader(io.StringIO(sample)) - if reader.fieldnames and not required_cols.issubset(reader.fieldnames): - sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") +def read_amex_csv(f: TextIO) -> list: + reader = csv.DictReader(f) + # The reader.line_num is the source line number, not the spreadsheet row + # number due to multi-line records. + return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)]) -def standardize_fr_record(row: Dict, line: int) -> Dict: - return { - 'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), - 'amount': parse_amount(row['Amount']), - 'payee': remove_payee_junk(row['Detail'] or '')[:20], - 'check_id': row['Serial Num'].lstrip('0'), +def validate_fr_csv(sample: str) -> None: + # No column headers in FR statements + reader = csv.reader(io.StringIO(sample)) + next(reader) # First row is previous statement ending balance + row = next(reader) + date = None + try: + date = datetime.datetime.strptime(row[1], '%m/%d/%Y') + except ValueError: + pass + amount_found = '$' in row[4] and '$' in row[5] + if len(row) != 6 or not date or not amount_found: + sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.") + + +def standardize_fr_record(line, row): + record = { + 'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(), + 'amount': parse_amount(row[4]), + 'payee': remove_payee_junk(row[3] or '')[:20], + 'check_id': row[2].replace('CHECK ', '') if 'CHECK ' in row[2] else '', 'line': line, } + return record + + +def read_fr_csv(f: TextIO) -> list: + reader = csv.reader(f) + # The reader.line_num is the source line number, not the spreadsheet row + # number due to multi-line records. + return sort_records( + standardize_fr_record(i, row) for i, row in enumerate(reader, 1) + if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'} + ) def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def] @@ -617,16 +637,19 @@ def main(arglist: Optional[Sequence[str]] = None, # transaction data structure. if 'AMEX' in args.account: validate_csv = validate_amex_csv - standardize_statement_record = standardize_amex_record + read_csv = read_amex_csv else: validate_csv = validate_fr_csv - standardize_statement_record = standardize_fr_record + read_csv = read_fr_csv with open(args.csv_statement) as f: sample = f.read(200) - validate_csv(sample, args.account) + # Validate should return true/false and a message. + validate_csv(sample) f.seek(0) - statement_trans = read_transactions_from_csv(f, standardize_statement_record) + # TODO: Needs a custom read_transactions_from_csv for each of AMEX and + # FR since AMEX has a header row and FR doesn't. + statement_trans = read_csv(f) # Dates are taken from the beginning/end of the statement. begin_date = statement_trans[0]['date'] diff --git a/tests/test_reconcile.py b/tests/test_reconcile.py index aa368a8..9d58eb7 100644 --- a/tests/test_reconcile.py +++ b/tests/test_reconcile.py @@ -1,5 +1,6 @@ import datetime import decimal +import io import os import tempfile import textwrap @@ -9,10 +10,10 @@ from conservancy_beancount.reconcile.statement_reconciler import ( match_statement_and_books, metadata_for_match, payee_match, + read_amex_csv, + read_fr_csv, remove_duplicate_words, remove_payee_junk, - standardize_amex_record, - standardize_fr_record, subset_match, totals, write_metadata_to_books, @@ -345,40 +346,36 @@ def test_subset_passes_through_all_non_matches(): ) -def test_handles_fr_record_with_comma_separators(): - # CSV would look something like: - # - # "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail" - # "02/07/2022",,,,,,,"10,000.00",,,"XXXX" - input_row = { - 'Date': '02/07/2022', - 'Amount': '10,000.00', - 'Detail': 'XXXX', - 'Serial Num': '', - } - expected = { - 'date': datetime.date(2022, 2, 7), - 'amount': decimal.Decimal('10000'), - 'payee': 'XXXX', - 'check_id': '', - 'line': 1, - } - assert standardize_fr_record(input_row, line=1) == expected +def test_handles_amex_csv(): + CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n""" + expected = [ + { + 'date': datetime.date(2021, 8, 19), + 'amount': decimal.Decimal('-28.15'), + 'payee': 'Gandi San Francisco', + 'check_id': '', + 'line': 2, + }, + ] + assert read_amex_csv(io.StringIO(CSV)) == expected -def test_handles_amex_record_with_comma_separators(): - # This insn't typically a problem with AMEX, but adding for completeness. - input_row = { - 'Date': '02/07/2022', - 'Amount': '-10,000.00', # Amounts are from Bank's perspective/negated. - 'Description': 'XXXX', - 'Serial Num': '', - } - expected = { - 'date': datetime.date(2022, 2, 7), - 'amount': decimal.Decimal('10000'), - 'payee': 'XXXX', - 'check_id': '', - 'line': 1, - } - assert standardize_amex_record(input_row, line=1) == expected +def test_handles_fr_csv(): + CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n""" + expected = [ + { + 'date': datetime.date(2022, 4, 1), + 'amount': decimal.Decimal('6.50'), + 'payee': 'GONDOR S.S. A1111111', + 'check_id': '', + 'line': 2, + }, + { + 'date': datetime.date(2022, 4, 18), + 'amount': decimal.Decimal('-4.50'), + 'payee': '', + 'check_id': '3741', + 'line': 3, + }, + ] + assert read_fr_csv(io.StringIO(CSV)) == expected