reconcile: Update to support standard FR CSV format

It seems as though we were previously using a transaction search export CSV,
rather than the official statement CSV.
This commit is contained in:
Ben Sturmfels 2023-01-30 23:21:42 +11:00
parent 0968f7f051
commit 3acc097d32
2 changed files with 83 additions and 63 deletions

View file

@ -90,6 +90,7 @@ Other related problems we're not dealing with here:
"""
# TODO:
# - entry_point seems to swallow errors
# - extract the magic numbers
# - consider merging in helper.py
@ -105,7 +106,7 @@ import logging
import os
import re
import sys
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO
from typing import Dict, List, Optional, Sequence, Tuple, TextIO
from beancount import loader
from beancount.query.query import run_query
@ -187,23 +188,16 @@ def remove_payee_junk(payee: str) -> str:
return payee
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
reader = csv.DictReader(f)
# The reader.line_num is the source line number, not the spreadsheet row
# number due to multi-line records.
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
def parse_amount(amount: str) -> decimal.Decimal:
"""Parse amounts and handle comma separators as seen in some FR statements."""
return decimal.Decimal(amount.replace(',', ''))
return decimal.Decimal(amount.replace('$', '').replace(',', ''))
def validate_amex_csv(sample: str, account: str) -> None:
def validate_amex_csv(sample: str) -> None:
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
reader = csv.DictReader(io.StringIO(sample))
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.")
def standardize_amex_record(row: Dict, line: int) -> Dict:
@ -220,21 +214,47 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
}
def validate_fr_csv(sample: str, account: str) -> None:
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
reader = csv.DictReader(io.StringIO(sample))
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
def read_amex_csv(f: TextIO) -> list:
reader = csv.DictReader(f)
# The reader.line_num is the source line number, not the spreadsheet row
# number due to multi-line records.
return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)])
def standardize_fr_record(row: Dict, line: int) -> Dict:
return {
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
'amount': parse_amount(row['Amount']),
'payee': remove_payee_junk(row['Detail'] or '')[:20],
'check_id': row['Serial Num'].lstrip('0'),
def validate_fr_csv(sample: str) -> None:
# No column headers in FR statements
reader = csv.reader(io.StringIO(sample))
next(reader) # First row is previous statement ending balance
row = next(reader)
date = None
try:
date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
except ValueError:
pass
amount_found = '$' in row[4] and '$' in row[5]
if len(row) != 6 or not date or not amount_found:
sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.")
def standardize_fr_record(line, row):
record = {
'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
'amount': parse_amount(row[4]),
'payee': remove_payee_junk(row[3] or '')[:20],
'check_id': row[2].replace('CHECK ', '') if 'CHECK ' in row[2] else '',
'line': line,
}
return record
def read_fr_csv(f: TextIO) -> list:
reader = csv.reader(f)
# The reader.line_num is the source line number, not the spreadsheet row
# number due to multi-line records.
return sort_records(
standardize_fr_record(i, row) for i, row in enumerate(reader, 1)
if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
)
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
@ -617,16 +637,19 @@ def main(arglist: Optional[Sequence[str]] = None,
# transaction data structure.
if 'AMEX' in args.account:
validate_csv = validate_amex_csv
standardize_statement_record = standardize_amex_record
read_csv = read_amex_csv
else:
validate_csv = validate_fr_csv
standardize_statement_record = standardize_fr_record
read_csv = read_fr_csv
with open(args.csv_statement) as f:
sample = f.read(200)
validate_csv(sample, args.account)
# Validate should return true/false and a message.
validate_csv(sample)
f.seek(0)
statement_trans = read_transactions_from_csv(f, standardize_statement_record)
# TODO: Needs a custom read_transactions_from_csv for each of AMEX and
# FR since AMEX has a header row and FR doesn't.
statement_trans = read_csv(f)
# Dates are taken from the beginning/end of the statement.
begin_date = statement_trans[0]['date']

View file

@ -1,5 +1,6 @@
import datetime
import decimal
import io
import os
import tempfile
import textwrap
@ -9,10 +10,10 @@ from conservancy_beancount.reconcile.statement_reconciler import (
match_statement_and_books,
metadata_for_match,
payee_match,
read_amex_csv,
read_fr_csv,
remove_duplicate_words,
remove_payee_junk,
standardize_amex_record,
standardize_fr_record,
subset_match,
totals,
write_metadata_to_books,
@ -345,40 +346,36 @@ def test_subset_passes_through_all_non_matches():
)
def test_handles_fr_record_with_comma_separators():
# CSV would look something like:
#
# "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
# "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
input_row = {
'Date': '02/07/2022',
'Amount': '10,000.00',
'Detail': 'XXXX',
'Serial Num': '',
}
expected = {
'date': datetime.date(2022, 2, 7),
'amount': decimal.Decimal('10000'),
'payee': 'XXXX',
def test_handles_amex_csv():
CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
expected = [
{
'date': datetime.date(2021, 8, 19),
'amount': decimal.Decimal('-28.15'),
'payee': 'Gandi San Francisco',
'check_id': '',
'line': 1,
}
assert standardize_fr_record(input_row, line=1) == expected
'line': 2,
},
]
assert read_amex_csv(io.StringIO(CSV)) == expected
def test_handles_amex_record_with_comma_separators():
# This insn't typically a problem with AMEX, but adding for completeness.
input_row = {
'Date': '02/07/2022',
'Amount': '-10,000.00', # Amounts are from Bank's perspective/negated.
'Description': 'XXXX',
'Serial Num': '',
}
expected = {
'date': datetime.date(2022, 2, 7),
'amount': decimal.Decimal('10000'),
'payee': 'XXXX',
def test_handles_fr_csv():
CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
expected = [
{
'date': datetime.date(2022, 4, 1),
'amount': decimal.Decimal('6.50'),
'payee': 'GONDOR S.S. A1111111',
'check_id': '',
'line': 1,
}
assert standardize_amex_record(input_row, line=1) == expected
'line': 2,
},
{
'date': datetime.date(2022, 4, 18),
'amount': decimal.Decimal('-4.50'),
'payee': '',
'check_id': '3741',
'line': 3,
},
]
assert read_fr_csv(io.StringIO(CSV)) == expected