reconcile: Update to support standard FR CSV format

It seems as though we were previously using a transaction search export CSV,
rather than the official statement CSV.
This commit is contained in:
Ben Sturmfels 2023-01-30 23:21:42 +11:00
parent 0968f7f051
commit 3acc097d32
2 changed files with 83 additions and 63 deletions

View file

@ -90,6 +90,7 @@ Other related problems we're not dealing with here:
""" """
# TODO: # TODO:
# - entry_point seems to swallow errors
# - extract the magic numbers # - extract the magic numbers
# - consider merging in helper.py # - consider merging in helper.py
@ -105,7 +106,7 @@ import logging
import os import os
import re import re
import sys import sys
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO from typing import Dict, List, Optional, Sequence, Tuple, TextIO
from beancount import loader from beancount import loader
from beancount.query.query import run_query from beancount.query.query import run_query
@ -187,23 +188,16 @@ def remove_payee_junk(payee: str) -> str:
return payee return payee
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
reader = csv.DictReader(f)
# The reader.line_num is the source line number, not the spreadsheet row
# number due to multi-line records.
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
def parse_amount(amount: str) -> decimal.Decimal: def parse_amount(amount: str) -> decimal.Decimal:
"""Parse amounts and handle comma separators as seen in some FR statements.""" """Parse amounts and handle comma separators as seen in some FR statements."""
return decimal.Decimal(amount.replace(',', '')) return decimal.Decimal(amount.replace('$', '').replace(',', ''))
def validate_amex_csv(sample: str, account: str) -> None: def validate_amex_csv(sample: str) -> None:
required_cols = {'Date', 'Amount', 'Description', 'Card Member'} required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
reader = csv.DictReader(io.StringIO(sample)) reader = csv.DictReader(io.StringIO(sample))
if reader.fieldnames and not required_cols.issubset(reader.fieldnames): if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.")
def standardize_amex_record(row: Dict, line: int) -> Dict: def standardize_amex_record(row: Dict, line: int) -> Dict:
@ -220,21 +214,47 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
} }
def validate_fr_csv(sample: str, account: str) -> None: def read_amex_csv(f: TextIO) -> list:
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'} reader = csv.DictReader(f)
reader = csv.DictReader(io.StringIO(sample)) # The reader.line_num is the source line number, not the spreadsheet row
if reader.fieldnames and not required_cols.issubset(reader.fieldnames): # number due to multi-line records.
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)])
def standardize_fr_record(row: Dict, line: int) -> Dict: def validate_fr_csv(sample: str) -> None:
return { # No column headers in FR statements
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), reader = csv.reader(io.StringIO(sample))
'amount': parse_amount(row['Amount']), next(reader) # First row is previous statement ending balance
'payee': remove_payee_junk(row['Detail'] or '')[:20], row = next(reader)
'check_id': row['Serial Num'].lstrip('0'), date = None
try:
date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
except ValueError:
pass
amount_found = '$' in row[4] and '$' in row[5]
if len(row) != 6 or not date or not amount_found:
sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.")
def standardize_fr_record(line, row):
record = {
'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
'amount': parse_amount(row[4]),
'payee': remove_payee_junk(row[3] or '')[:20],
'check_id': row[2].replace('CHECK ', '') if 'CHECK ' in row[2] else '',
'line': line, 'line': line,
} }
return record
def read_fr_csv(f: TextIO) -> list:
reader = csv.reader(f)
# The reader.line_num is the source line number, not the spreadsheet row
# number due to multi-line records.
return sort_records(
standardize_fr_record(i, row) for i, row in enumerate(reader, 1)
if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
)
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def] def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
@ -617,16 +637,19 @@ def main(arglist: Optional[Sequence[str]] = None,
# transaction data structure. # transaction data structure.
if 'AMEX' in args.account: if 'AMEX' in args.account:
validate_csv = validate_amex_csv validate_csv = validate_amex_csv
standardize_statement_record = standardize_amex_record read_csv = read_amex_csv
else: else:
validate_csv = validate_fr_csv validate_csv = validate_fr_csv
standardize_statement_record = standardize_fr_record read_csv = read_fr_csv
with open(args.csv_statement) as f: with open(args.csv_statement) as f:
sample = f.read(200) sample = f.read(200)
validate_csv(sample, args.account) # Validate should return true/false and a message.
validate_csv(sample)
f.seek(0) f.seek(0)
statement_trans = read_transactions_from_csv(f, standardize_statement_record) # TODO: Needs a custom read_transactions_from_csv for each of AMEX and
# FR since AMEX has a header row and FR doesn't.
statement_trans = read_csv(f)
# Dates are taken from the beginning/end of the statement. # Dates are taken from the beginning/end of the statement.
begin_date = statement_trans[0]['date'] begin_date = statement_trans[0]['date']

View file

@ -1,5 +1,6 @@
import datetime import datetime
import decimal import decimal
import io
import os import os
import tempfile import tempfile
import textwrap import textwrap
@ -9,10 +10,10 @@ from conservancy_beancount.reconcile.statement_reconciler import (
match_statement_and_books, match_statement_and_books,
metadata_for_match, metadata_for_match,
payee_match, payee_match,
read_amex_csv,
read_fr_csv,
remove_duplicate_words, remove_duplicate_words,
remove_payee_junk, remove_payee_junk,
standardize_amex_record,
standardize_fr_record,
subset_match, subset_match,
totals, totals,
write_metadata_to_books, write_metadata_to_books,
@ -345,40 +346,36 @@ def test_subset_passes_through_all_non_matches():
) )
def test_handles_fr_record_with_comma_separators(): def test_handles_amex_csv():
# CSV would look something like: CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
# expected = [
# "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail" {
# "02/07/2022",,,,,,,"10,000.00",,,"XXXX" 'date': datetime.date(2021, 8, 19),
input_row = { 'amount': decimal.Decimal('-28.15'),
'Date': '02/07/2022', 'payee': 'Gandi San Francisco',
'Amount': '10,000.00', 'check_id': '',
'Detail': 'XXXX', 'line': 2,
'Serial Num': '', },
} ]
expected = { assert read_amex_csv(io.StringIO(CSV)) == expected
'date': datetime.date(2022, 2, 7),
'amount': decimal.Decimal('10000'),
'payee': 'XXXX',
'check_id': '',
'line': 1,
}
assert standardize_fr_record(input_row, line=1) == expected
def test_handles_amex_record_with_comma_separators(): def test_handles_fr_csv():
# This insn't typically a problem with AMEX, but adding for completeness. CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
input_row = { expected = [
'Date': '02/07/2022', {
'Amount': '-10,000.00', # Amounts are from Bank's perspective/negated. 'date': datetime.date(2022, 4, 1),
'Description': 'XXXX', 'amount': decimal.Decimal('6.50'),
'Serial Num': '', 'payee': 'GONDOR S.S. A1111111',
} 'check_id': '',
expected = { 'line': 2,
'date': datetime.date(2022, 2, 7), },
'amount': decimal.Decimal('10000'), {
'payee': 'XXXX', 'date': datetime.date(2022, 4, 18),
'check_id': '', 'amount': decimal.Decimal('-4.50'),
'line': 1, 'payee': '',
} 'check_id': '3741',
assert standardize_amex_record(input_row, line=1) == expected 'line': 3,
},
]
assert read_fr_csv(io.StringIO(CSV)) == expected