reconcile: Update to support standard FR CSV format
It seems as though we were previously using a transaction search export CSV, rather than the official statement CSV.
This commit is contained in:
		
							parent
							
								
									0968f7f051
								
							
						
					
					
						commit
						3acc097d32
					
				
					 2 changed files with 83 additions and 63 deletions
				
			
		|  | @ -90,6 +90,7 @@ Other related problems we're not dealing with here: | |||
| """ | ||||
| 
 | ||||
| # TODO: | ||||
| #  - entry_point seems to swallow errors | ||||
| #  - extract the magic numbers | ||||
| #  - consider merging in helper.py | ||||
| 
 | ||||
|  | @ -105,7 +106,7 @@ import logging | |||
| import os | ||||
| import re | ||||
| import sys | ||||
| from typing import Callable, Dict, List, Optional, Sequence, Tuple, TextIO | ||||
| from typing import Dict, List, Optional, Sequence, Tuple, TextIO | ||||
| 
 | ||||
| from beancount import loader | ||||
| from beancount.query.query import run_query | ||||
|  | @ -187,23 +188,16 @@ def remove_payee_junk(payee: str) -> str: | |||
|     return payee | ||||
| 
 | ||||
| 
 | ||||
| def read_transactions_from_csv(f: TextIO, standardize_statement_record: Callable) -> list: | ||||
|     reader = csv.DictReader(f) | ||||
|     # The reader.line_num is the source line number, not the spreadsheet row | ||||
|     # number due to multi-line records. | ||||
|     return sort_records([standardize_statement_record(row, i) for i, row in enumerate(reader, 2)]) | ||||
| 
 | ||||
| 
 | ||||
| def parse_amount(amount: str) -> decimal.Decimal: | ||||
|     """Parse amounts and handle comma separators as seen in some FR statements.""" | ||||
|     return decimal.Decimal(amount.replace(',', '')) | ||||
|     return decimal.Decimal(amount.replace('$', '').replace(',', '')) | ||||
| 
 | ||||
| 
 | ||||
| def validate_amex_csv(sample: str, account: str) -> None: | ||||
| def validate_amex_csv(sample: str) -> None: | ||||
|     required_cols = {'Date', 'Amount', 'Description', 'Card Member'} | ||||
|     reader = csv.DictReader(io.StringIO(sample)) | ||||
|     if reader.fieldnames and not required_cols.issubset(reader.fieldnames): | ||||
|         sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") | ||||
|         sys.exit(f"This AMEX CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}. Please use an unmodified statement direct from the institution.") | ||||
| 
 | ||||
| 
 | ||||
| def standardize_amex_record(row: Dict, line: int) -> Dict: | ||||
|  | @ -220,21 +214,47 @@ def standardize_amex_record(row: Dict, line: int) -> Dict: | |||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def validate_fr_csv(sample: str, account: str) -> None: | ||||
|     required_cols = {'Date', 'Amount', 'Detail', 'Serial Num'} | ||||
|     reader = csv.DictReader(io.StringIO(sample)) | ||||
|     if reader.fieldnames and not required_cols.issubset(reader.fieldnames): | ||||
|         sys.exit(f"This CSV doesn't seem to have the columns we're expecting, including: {', '.join(required_cols)}") | ||||
| def read_amex_csv(f: TextIO) -> list: | ||||
|     reader = csv.DictReader(f) | ||||
|     # The reader.line_num is the source line number, not the spreadsheet row | ||||
|     # number due to multi-line records. | ||||
|     return sort_records([standardize_amex_record(row, i) for i, row in enumerate(reader, 2)]) | ||||
| 
 | ||||
| 
 | ||||
| def standardize_fr_record(row: Dict, line: int) -> Dict: | ||||
|     return { | ||||
|         'date': datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), | ||||
|         'amount': parse_amount(row['Amount']), | ||||
|         'payee': remove_payee_junk(row['Detail'] or '')[:20], | ||||
|         'check_id': row['Serial Num'].lstrip('0'), | ||||
| def validate_fr_csv(sample: str) -> None: | ||||
|     # No column headers in FR statements | ||||
|     reader = csv.reader(io.StringIO(sample)) | ||||
|     next(reader)  # First row is previous statement ending balance | ||||
|     row = next(reader) | ||||
|     date = None | ||||
|     try: | ||||
|         date = datetime.datetime.strptime(row[1], '%m/%d/%Y') | ||||
|     except ValueError: | ||||
|         pass | ||||
|     amount_found = '$' in row[4] and '$' in row[5] | ||||
|     if len(row) != 6 or not date or not amount_found: | ||||
|         sys.exit("This First Republic CSV doesn't seem to have the 6 columns we're expecting, including a date in column 2 and an amount in columns 5 and 6. Please use an unmodified statement direct from the institution.") | ||||
| 
 | ||||
| 
 | ||||
| def standardize_fr_record(line, row): | ||||
|     record = { | ||||
|         'date': datetime.datetime.strptime(row[1], '%m/%d/%Y').date(), | ||||
|         'amount': parse_amount(row[4]), | ||||
|         'payee': remove_payee_junk(row[3] or '')[:20], | ||||
|         'check_id': row[2].replace('CHECK  ', '') if 'CHECK  ' in row[2] else '', | ||||
|         'line': line, | ||||
|     } | ||||
|     return record | ||||
| 
 | ||||
| 
 | ||||
| def read_fr_csv(f: TextIO) -> list: | ||||
|     reader = csv.reader(f) | ||||
|     # The reader.line_num is the source line number, not the spreadsheet row | ||||
|     # number due to multi-line records. | ||||
|     return sort_records( | ||||
|         standardize_fr_record(i, row) for i, row in enumerate(reader, 1) | ||||
|         if len(row) == 6 and row[2] not in {'LAST STATEMENT', 'THIS STATEMENT'} | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def] | ||||
|  | @ -617,16 +637,19 @@ def main(arglist: Optional[Sequence[str]] = None, | |||
|     # transaction data structure. | ||||
|     if 'AMEX' in args.account: | ||||
|         validate_csv = validate_amex_csv | ||||
|         standardize_statement_record = standardize_amex_record | ||||
|         read_csv = read_amex_csv | ||||
|     else: | ||||
|         validate_csv = validate_fr_csv | ||||
|         standardize_statement_record = standardize_fr_record | ||||
|         read_csv = read_fr_csv | ||||
| 
 | ||||
|     with open(args.csv_statement) as f: | ||||
|         sample = f.read(200) | ||||
|         validate_csv(sample, args.account) | ||||
|         # Validate should return true/false and a message. | ||||
|         validate_csv(sample) | ||||
|         f.seek(0) | ||||
|         statement_trans = read_transactions_from_csv(f, standardize_statement_record) | ||||
|         # TODO: Needs a custom read_transactions_from_csv for each of AMEX and | ||||
|         # FR since AMEX has a header row and FR doesn't. | ||||
|         statement_trans = read_csv(f) | ||||
| 
 | ||||
|     # Dates are taken from the beginning/end of the statement. | ||||
|     begin_date = statement_trans[0]['date'] | ||||
|  |  | |||
|  | @ -1,5 +1,6 @@ | |||
| import datetime | ||||
| import decimal | ||||
| import io | ||||
| import os | ||||
| import tempfile | ||||
| import textwrap | ||||
|  | @ -9,10 +10,10 @@ from conservancy_beancount.reconcile.statement_reconciler import ( | |||
|     match_statement_and_books, | ||||
|     metadata_for_match, | ||||
|     payee_match, | ||||
|     read_amex_csv, | ||||
|     read_fr_csv, | ||||
|     remove_duplicate_words, | ||||
|     remove_payee_junk, | ||||
|     standardize_amex_record, | ||||
|     standardize_fr_record, | ||||
|     subset_match, | ||||
|     totals, | ||||
|     write_metadata_to_books, | ||||
|  | @ -345,40 +346,36 @@ def test_subset_passes_through_all_non_matches(): | |||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def test_handles_fr_record_with_comma_separators(): | ||||
|     # CSV would look something like: | ||||
|     # | ||||
|     # "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail" | ||||
|     # "02/07/2022",,,,,,,"10,000.00",,,"XXXX" | ||||
|     input_row = { | ||||
|         'Date': '02/07/2022', | ||||
|         'Amount': '10,000.00', | ||||
|         'Detail': 'XXXX', | ||||
|         'Serial Num': '', | ||||
|     } | ||||
|     expected = { | ||||
|         'date': datetime.date(2022, 2, 7), | ||||
|         'amount': decimal.Decimal('10000'), | ||||
|         'payee': 'XXXX', | ||||
|         'check_id': '', | ||||
|         'line': 1, | ||||
|     } | ||||
|     assert standardize_fr_record(input_row, line=1) == expected | ||||
| def test_handles_amex_csv(): | ||||
|     CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net           San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net           San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n""" | ||||
|     expected = [ | ||||
|         { | ||||
|             'date': datetime.date(2021, 8, 19), | ||||
|             'amount': decimal.Decimal('-28.15'), | ||||
|             'payee': 'Gandi San Francisco', | ||||
|             'check_id': '', | ||||
|             'line': 2, | ||||
|         }, | ||||
|     ] | ||||
|     assert read_amex_csv(io.StringIO(CSV)) == expected | ||||
| 
 | ||||
| 
 | ||||
| def test_handles_amex_record_with_comma_separators(): | ||||
|     # This insn't typically a problem with AMEX, but adding for completeness. | ||||
|     input_row = { | ||||
|         'Date': '02/07/2022', | ||||
|         'Amount': '-10,000.00',  # Amounts are from Bank's perspective/negated. | ||||
|         'Description': 'XXXX', | ||||
|         'Serial Num': '', | ||||
|     } | ||||
|     expected = { | ||||
|         'date': datetime.date(2022, 2, 7), | ||||
|         'amount': decimal.Decimal('10000'), | ||||
|         'payee': 'XXXX', | ||||
|         'check_id': '', | ||||
|         'line': 1, | ||||
|     } | ||||
|     assert standardize_amex_record(input_row, line=1) == expected | ||||
| def test_handles_fr_csv(): | ||||
|     CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK  3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n""" | ||||
|     expected = [ | ||||
|         { | ||||
|             'date': datetime.date(2022, 4, 1), | ||||
|             'amount': decimal.Decimal('6.50'), | ||||
|             'payee': 'GONDOR S.S. A1111111', | ||||
|             'check_id': '', | ||||
|             'line': 2, | ||||
|         }, | ||||
|         { | ||||
|             'date': datetime.date(2022, 4, 18), | ||||
|             'amount': decimal.Decimal('-4.50'), | ||||
|             'payee': '', | ||||
|             'check_id': '3741', | ||||
|             'line': 3, | ||||
|         }, | ||||
|     ] | ||||
|     assert read_fr_csv(io.StringIO(CSV)) == expected | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue