reconcile: Update to support standard FR CSV format
It seems as though we were previously using a transaction search export CSV, rather than the official statement CSV.
This commit is contained in:
parent
0968f7f051
commit
3acc097d32
2 changed files with 83 additions and 63 deletions
|
@ -90,6 +90,7 @@ Other related problems we're not dealing with here:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO:
|
# TODO:
|
||||||
|
# - entry_point seems to swallow errors
|
||||||
# - extract the magic numbers
|
# - extract the magic numbers
|
||||||
# - consider merging in helper.py
|
# - consider merging in helper.py
|
||||||
|
|
||||||
|
@ -105,7 +106,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO
|
from typing import Dict, List, Optional, Sequence, Tuple, TextIO
|
||||||
|
|
||||||
from beancount import loader
|
from beancount import loader
|
||||||
from beancount.query.query import run_query
|
from beancount.query.query import run_query
|
||||||
|
@ -187,23 +188,16 @@ def remove_payee_junk(payee: str) -> str:
|
||||||
return payee
|
return payee
|
||||||
|
|
||||||
|
|
||||||
def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list:
|
|
||||||
reader = csv.DictReader(f)
|
|
||||||
# The reader.line_num is the source line number, not the spreadsheet row
|
|
||||||
# number due to multi-line records.
|
|
||||||
return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)])
|
|
||||||
|
|
||||||
|
|
||||||
def parse_amount(amount: str) -> decimal.Decimal:
|
def parse_amount(amount: str) -> decimal.Decimal:
|
||||||
"""Parse amounts and handle comma separators as seen in some FR statements."""
|
"""Parse amounts and handle comma separators as seen in some FR statements."""
|
||||||
return decimal.Decimal(amount.replace(',', ''))
|
return decimal.Decimal(amount.replace('$', '').replace(',', ''))
|
||||||
|
|
||||||
|
|
||||||
def validate_amex_csv(sample: str, account: str) -> None:
|
def validate_amex_csv(sample: str) -> None:
|
||||||
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
required_cols = {'Date', 'Amount', 'Description', 'Card Member'}
|
||||||
reader = csv.DictReader(io.StringIO(sample))
|
reader = csv.DictReader(io.StringIO(sample))
|
||||||
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
||||||
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.")
|
||||||
|
|
||||||
|
|
||||||
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||||
|
@ -220,21 +214,47 @@ def standardize_amex_record(row: Dict, line: int) -> Dict:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def validate_fr_csv(sample: str, account: str) -> None:
|
def read_amex_csv(f: TextIO) -> list:
|
||||||
required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'}
|
reader = csv.DictReader(f)
|
||||||
reader = csv.DictReader(io.StringIO(sample))
|
# The reader.line_num is the source line number, not the spreadsheet row
|
||||||
if reader.fieldnames and not required_cols.issubset(reader.fieldnames):
|
# number due to multi-line records.
|
||||||
sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}")
|
return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)])
|
||||||
|
|
||||||
|
|
||||||
def standardize_fr_record(row: Dict, line: int) -> Dict:
|
def validate_fr_csv(sample: str) -> None:
|
||||||
return {
|
# No column headers in FR statements
|
||||||
'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
|
reader = csv.reader(io.StringIO(sample))
|
||||||
'amount': parse_amount(row['Amount']),
|
next(reader) # First row is previous statement ending balance
|
||||||
'payee': remove_payee_junk(row['Detail'] or '')[:20],
|
row = next(reader)
|
||||||
'check_id': row['Serial Num'].lstrip('0'),
|
date = None
|
||||||
|
try:
|
||||||
|
date = datetime.datetime.strptime(row[1], '%m/%d/%Y')
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
amount_found = '$' in row[4] and '$' in row[5]
|
||||||
|
if len(row) != 6 or not date or not amount_found:
|
||||||
|
sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.")
|
||||||
|
|
||||||
|
|
||||||
|
def standardize_fr_record(line, row):
|
||||||
|
record = {
|
||||||
|
'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(),
|
||||||
|
'amount': parse_amount(row[4]),
|
||||||
|
'payee': remove_payee_junk(row[3] or '')[:20],
|
||||||
|
'check_id': row[2].replace('CHECK ', '') if 'CHECK ' in row[2] else '',
|
||||||
'line': line,
|
'line': line,
|
||||||
}
|
}
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def read_fr_csv(f: TextIO) -> list:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
# The reader.line_num is the source line number, not the spreadsheet row
|
||||||
|
# number due to multi-line records.
|
||||||
|
return sort_records(
|
||||||
|
standardize_fr_record(i, row) for i, row in enumerate(reader, 1)
|
||||||
|
if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
def standardize_beancount_record(row) -> Dict: # type: ignore[no-untyped-def]
|
||||||
|
@ -617,16 +637,19 @@ def main(arglist: Optional[Sequence[str]] = None,
|
||||||
# transaction data structure.
|
# transaction data structure.
|
||||||
if 'AMEX' in args.account:
|
if 'AMEX' in args.account:
|
||||||
validate_csv = validate_amex_csv
|
validate_csv = validate_amex_csv
|
||||||
standardize_statement_record = standardize_amex_record
|
read_csv = read_amex_csv
|
||||||
else:
|
else:
|
||||||
validate_csv = validate_fr_csv
|
validate_csv = validate_fr_csv
|
||||||
standardize_statement_record = standardize_fr_record
|
read_csv = read_fr_csv
|
||||||
|
|
||||||
with open(args.csv_statement) as f:
|
with open(args.csv_statement) as f:
|
||||||
sample = f.read(200)
|
sample = f.read(200)
|
||||||
validate_csv(sample, args.account)
|
# Validate should return true/false and a message.
|
||||||
|
validate_csv(sample)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
statement_trans = read_transactions_from_csv(f, standardize_statement_record)
|
# TODO: Needs a custom read_transactions_from_csv for each of AMEX and
|
||||||
|
# FR since AMEX has a header row and FR doesn't.
|
||||||
|
statement_trans = read_csv(f)
|
||||||
|
|
||||||
# Dates are taken from the beginning/end of the statement.
|
# Dates are taken from the beginning/end of the statement.
|
||||||
begin_date = statement_trans[0]['date']
|
begin_date = statement_trans[0]['date']
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import datetime
|
import datetime
|
||||||
import decimal
|
import decimal
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import textwrap
|
import textwrap
|
||||||
|
@ -9,10 +10,10 @@ from conservancy_beancount.reconcile.statement_reconciler import (
|
||||||
match_statement_and_books,
|
match_statement_and_books,
|
||||||
metadata_for_match,
|
metadata_for_match,
|
||||||
payee_match,
|
payee_match,
|
||||||
|
read_amex_csv,
|
||||||
|
read_fr_csv,
|
||||||
remove_duplicate_words,
|
remove_duplicate_words,
|
||||||
remove_payee_junk,
|
remove_payee_junk,
|
||||||
standardize_amex_record,
|
|
||||||
standardize_fr_record,
|
|
||||||
subset_match,
|
subset_match,
|
||||||
totals,
|
totals,
|
||||||
write_metadata_to_books,
|
write_metadata_to_books,
|
||||||
|
@ -345,40 +346,36 @@ def test_subset_passes_through_all_non_matches():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_handles_fr_record_with_comma_separators():
|
def test_handles_amex_csv():
|
||||||
# CSV would look something like:
|
CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
|
||||||
#
|
expected = [
|
||||||
# "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
|
{
|
||||||
# "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
|
'date': datetime.date(2021, 8, 19),
|
||||||
input_row = {
|
'amount': decimal.Decimal('-28.15'),
|
||||||
'Date': '02/07/2022',
|
'payee': 'Gandi San Francisco',
|
||||||
'Amount': '10,000.00',
|
'check_id': '',
|
||||||
'Detail': 'XXXX',
|
'line': 2,
|
||||||
'Serial Num': '',
|
},
|
||||||
}
|
]
|
||||||
expected = {
|
assert read_amex_csv(io.StringIO(CSV)) == expected
|
||||||
'date': datetime.date(2022, 2, 7),
|
|
||||||
'amount': decimal.Decimal('10000'),
|
|
||||||
'payee': 'XXXX',
|
|
||||||
'check_id': '',
|
|
||||||
'line': 1,
|
|
||||||
}
|
|
||||||
assert standardize_fr_record(input_row, line=1) == expected
|
|
||||||
|
|
||||||
|
|
||||||
def test_handles_amex_record_with_comma_separators():
|
def test_handles_fr_csv():
|
||||||
# This insn't typically a problem with AMEX, but adding for completeness.
|
CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
|
||||||
input_row = {
|
expected = [
|
||||||
'Date': '02/07/2022',
|
{
|
||||||
'Amount': '-10,000.00', # Amounts are from Bank's perspective/negated.
|
'date': datetime.date(2022, 4, 1),
|
||||||
'Description': 'XXXX',
|
'amount': decimal.Decimal('6.50'),
|
||||||
'Serial Num': '',
|
'payee': 'GONDOR S.S. A1111111',
|
||||||
}
|
'check_id': '',
|
||||||
expected = {
|
'line': 2,
|
||||||
'date': datetime.date(2022, 2, 7),
|
},
|
||||||
'amount': decimal.Decimal('10000'),
|
{
|
||||||
'payee': 'XXXX',
|
'date': datetime.date(2022, 4, 18),
|
||||||
'check_id': '',
|
'amount': decimal.Decimal('-4.50'),
|
||||||
'line': 1,
|
'payee': '',
|
||||||
}
|
'check_id': '3741',
|
||||||
assert standardize_amex_record(input_row, line=1) == expected
|
'line': 3,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
assert read_fr_csv(io.StringIO(CSV)) == expected
|
||||||
|
|
Loading…
Reference in a new issue