2022-02-18 09:36:11 +00:00
import datetime
import decimal
2023-01-30 12:21:42 +00:00
import io
2022-02-21 07:31:07 +00:00
import os
import tempfile
import textwrap
2022-02-18 09:36:11 +00:00
2022-02-22 22:58:53 +00:00
from conservancy_beancount . reconcile . statement_reconciler import (
2022-02-18 13:27:48 +00:00
date_proximity ,
2023-02-11 04:23:15 +00:00
format_output ,
2023-01-13 02:58:36 +00:00
match_statement_and_books ,
2022-02-21 07:31:07 +00:00
metadata_for_match ,
2023-01-13 02:58:36 +00:00
payee_match ,
2023-01-30 12:21:42 +00:00
read_amex_csv ,
read_fr_csv ,
2023-01-13 02:58:36 +00:00
remove_duplicate_words ,
remove_payee_junk ,
2022-02-23 06:24:31 +00:00
subset_match ,
2023-01-13 02:58:36 +00:00
totals ,
write_metadata_to_books ,
2022-02-18 13:27:48 +00:00
)
2022-02-18 09:36:11 +00:00
2022-02-22 22:58:53 +00:00
# These data structures represent individual transactions as taken from the
# statement ("S") or the books ("B").
# Statement transaction examples.
2022-02-18 09:36:11 +00:00
S1 = {
' date ' : datetime . date ( 2022 , 1 , 1 ) ,
' amount ' : decimal . Decimal ( ' 10.00 ' ) ,
' payee ' : ' Patreon / Patreon / 123456/ ST-A1B2C3D4G5H6 / ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' line ' : 222 ,
}
S2 = {
' date ' : datetime . date ( 2022 , 1 , 2 ) ,
' amount ' : decimal . Decimal ( ' 20.00 ' ) ,
' payee ' : ' BT*LINODE PHILADELPHIA P ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' line ' : 333 ,
}
S3 = {
' date ' : datetime . date ( 2022 , 1 , 3 ) ,
' amount ' : decimal . Decimal ( ' 30.00 ' ) ,
' payee ' : ' USPS PO 4067540039 0PORTLAND OR ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' line ' : 444 ,
}
2022-02-23 06:24:31 +00:00
S4 = {
' date ' : datetime . date ( 2022 , 8 , 11 ) ,
' amount ' : decimal . Decimal ( ' -2260.00 ' ) ,
' payee ' : ' Trust 0000000362 210 ' ,
' check_id ' : ' ' ,
' line ' : 555 ,
}
2022-02-18 09:36:11 +00:00
2022-02-22 22:58:53 +00:00
# Books transaction examples.
2022-02-18 09:36:11 +00:00
B1 = {
' date ' : datetime . date ( 2022 , 1 , 1 ) ,
' amount ' : decimal . Decimal ( ' 10.00 ' ) ,
' payee ' : ' Patreon ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/imports.beancount ' ,
' line ' : 777 ,
2022-02-21 07:31:07 +00:00
' bank_statement ' : ' ' ,
2022-02-18 09:36:11 +00:00
}
B2 = {
' date ' : datetime . date ( 2022 , 1 , 2 ) ,
' amount ' : decimal . Decimal ( ' 20.00 ' ) ,
' payee ' : ' Linode ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/main.beancount ' ,
' line ' : 888 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
B3_next_day = {
' date ' : datetime . date ( 2022 , 1 , 4 ) ,
' amount ' : decimal . Decimal ( ' 30.00 ' ) ,
' payee ' : ' USPS ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/main.beancount ' ,
' line ' : 999 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
B3_next_week = {
' date ' : datetime . date ( 2022 , 1 , 10 ) ,
' amount ' : decimal . Decimal ( ' 30.00 ' ) ,
' payee ' : ' USPS ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/main.beancount ' ,
' line ' : 999 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
B3_mismatch_amount = {
' date ' : datetime . date ( 2022 , 1 , 3 ) ,
' amount ' : decimal . Decimal ( ' 31.00 ' ) ,
' payee ' : ' USPS ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/main.beancount ' ,
' line ' : 999 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
B3_payee_mismatch_1 = {
' date ' : datetime . date ( 2022 , 1 , 3 ) ,
' amount ' : decimal . Decimal ( ' 30.00 ' ) ,
' payee ' : ' Credit X ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/main.beancount ' ,
' line ' : 999 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
B3_payee_mismatch_2 = {
' date ' : datetime . date ( 2022 , 1 , 3 ) ,
' amount ' : decimal . Decimal ( ' 30.00 ' ) ,
' payee ' : ' Credit Y ' ,
2022-02-18 13:27:48 +00:00
' check_id ' : ' ' ,
2022-02-18 09:36:11 +00:00
' filename ' : ' 2022/main.beancount ' ,
' line ' : 999 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
2022-02-21 11:43:22 +00:00
B3_unmatched_check_id = {
' date ' : datetime . date ( 2022 , 1 , 3 ) ,
' amount ' : decimal . Decimal ( ' 30.00 ' ) ,
' payee ' : ' USPS ' ,
' check_id ' : ' 1234 ' ,
' filename ' : ' 2022/main.beancount ' ,
' line ' : 999 ,
' bank_statement ' : " Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf "
}
2022-02-23 06:24:31 +00:00
B4A = {
' date ' : datetime . date ( 2022 , 8 , 11 ) ,
' amount ' : decimal . Decimal ( ' -250.00 ' ) ,
' payee ' : ' TRUST 0000000362 ACH Retirement Plan ' ,
' check_id ' : ' ' ,
' line ' : 1000 ,
}
B4B = {
' date ' : datetime . date ( 2022 , 8 , 11 ) ,
' amount ' : decimal . Decimal ( ' -250.00 ' ) ,
' payee ' : ' TRUST 0000000362 ACH Retirement Plan ' ,
' check_id ' : ' ' ,
' line ' : 1000 ,
}
B4C = {
' date ' : datetime . date ( 2022 , 8 , 11 ) ,
' amount ' : decimal . Decimal ( ' -1760.00 ' ) ,
' payee ' : ' TRUST 0000000362 ACH Retirement Plan ' ,
' check_id ' : ' ' ,
' line ' : 1000 ,
}
2022-02-18 09:36:11 +00:00
def test_one_exact_match ( ) :
statement = [ S1 ]
books = [ B1 ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
2022-02-22 22:58:53 +00:00
# Match, match, notes.
#
# The matches are a list so we can implement subset-sum matching where
# multiple books transactions may match to a single statement
# transaction.
2022-02-23 06:24:31 +00:00
[ ( [ S1 ] , [ B1 ] , [ ] ) ] ,
[ ] ,
[ ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_multiple_exact_matches ( ) :
statement = [ S1 , S2 ]
books = [ B1 , B2 ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ( [ S1 ] , [ B1 ] , [ ] ) , ( [ S2 ] , [ B2 ] , [ ] ) ] ,
[ ] ,
[ ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_one_mismatch ( ) :
statement = [ S1 ]
books = [ ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ] ,
[ S1 ] ,
[ ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_multiple_mismatches ( ) :
statement = [ S1 ]
books = [ B2 ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ] ,
[ S1 ] ,
[ B2 ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_next_day_matches ( ) :
statement = [ S3 ]
books = [ B3_next_day ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ( [ S3 ] , [ B3_next_day ] , [ ' +/- 1 days ' ] ) ] ,
[ ] ,
[ ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_next_week_matches ( ) :
statement = [ S3 ]
books = [ B3_next_week ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ( [ S3 ] , [ B3_next_week ] , [ ' +/- 7 days ' ] ) ] ,
[ ] ,
[ ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_incorrect_amount_does_not_match ( ) :
statement = [ S3 ]
books = [ B3_mismatch_amount ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ] ,
[ S3 ] ,
[ B3_mismatch_amount ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_payee_mismatch_ok_when_only_one_that_amount_and_date ( ) :
statement = [ S3 ]
books = [ B3_payee_mismatch_1 ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
2022-02-24 11:43:37 +00:00
[ ( [ S3 ] , [ B3_payee_mismatch_1 ] , [ ' payee mismatch ' ] ) ] ,
2022-02-23 06:24:31 +00:00
[ ] ,
[ ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date ( ) :
statement = [ S3 ]
books = [ B3_payee_mismatch_1 , B3_payee_mismatch_2 ]
match = match_statement_and_books ( statement , books )
2022-02-23 06:24:31 +00:00
assert match == (
[ ] ,
[ S3 ] ,
[ B3_payee_mismatch_1 , B3_payee_mismatch_2 ] ,
)
2022-02-18 09:36:11 +00:00
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_remove_payee_junk ( ) :
assert remove_payee_junk ( ' WIDGETSRUS INC PAYMENT 1 ' ) == ' WIDGETSRUS '
assert remove_payee_junk ( ' 0000010017 ' ) == ' 10017 '
2022-03-02 22:36:33 +00:00
2022-02-18 09:36:11 +00:00
def test_date_proximity ( ) :
assert date_proximity ( datetime . date ( 2021 , 8 , 23 ) , datetime . date ( 2021 , 8 , 23 ) ) == 1.0
assert date_proximity ( datetime . date ( 2021 , 8 , 23 ) , datetime . date ( 2021 , 8 , 23 ) - datetime . timedelta ( days = 30 ) ) == 0.5
assert date_proximity ( datetime . date ( 2021 , 8 , 23 ) , datetime . date ( 2021 , 8 , 23 ) - datetime . timedelta ( days = 60 ) ) == 0.0
2022-02-18 13:27:48 +00:00
2022-02-21 01:16:24 +00:00
def test_remove_duplicate_words ( ) :
assert remove_duplicate_words ( ' Hi Foo Kow FOO ' ) == ' Hi Foo Kow '
2022-03-02 22:36:33 +00:00
2022-02-21 01:16:24 +00:00
def test_payee_matches_when_first_word_matches ( ) :
assert payee_match ( ' Gandi San Francisco ' , ' Gandi example.com renewal 1234567 ' ) == 1.0
assert payee_match ( ' USPS 123456789 Portland ' , ' USPS John Brown ' ) == 0.8
2022-02-21 07:31:07 +00:00
2022-03-02 22:36:33 +00:00
2022-02-21 07:31:07 +00:00
def test_metadata_for_match ( monkeypatch ) :
monkeypatch . setenv ( ' CONSERVANCY_REPOSITORY ' , ' . ' )
assert metadata_for_match ( ( [ S1 ] , [ B1 ] , [ ] ) , ' statement.pdf ' , ' statement.csv ' ) == [
2022-02-24 11:43:37 +00:00
( ' 2022/imports.beancount ' , 777 , ' bank-statement: " statement.pdf " ' ) ,
( ' 2022/imports.beancount ' , 777 , ' bank-statement-csv: " statement.csv:222 " ' ) ,
2022-02-21 07:31:07 +00:00
]
2022-03-02 22:36:33 +00:00
2022-02-21 11:22:42 +00:00
def test_no_metadata_if_no_matches ( ) :
assert metadata_for_match ( ( [ S1 ] , [ ] , [ ' no match ' ] ) , ' statement.pdf ' , ' statement.csv ' ) == [ ]
assert metadata_for_match ( ( [ ] , [ B1 ] , [ ' no match ' ] ) , ' statement.pdf ' , ' statement.csv ' ) == [ ]
assert metadata_for_match ( ( [ S1 ] , [ B2 ] , [ ' no match ' ] ) , ' statement.pdf ' , ' statement.csv ' ) == [ ]
2022-02-21 07:31:07 +00:00
2022-03-02 22:36:33 +00:00
2022-02-21 07:31:07 +00:00
def test_write_to_books ( ) :
books = textwrap . dedent ( """ \
2021 - 08 - 16 txn " Gandi " " transfer seleniumconf.us "
Liabilities : CreditCard : AMEX - 15.50 USD
Expenses : Hosting 15.50 USD """ )
f = tempfile . NamedTemporaryFile ( ' w ' , delete = False )
f . write ( books )
f . close ( )
metadata = [ ( f . name , 2 , ' bank-statement: statement.pdf ' ) ]
write_metadata_to_books ( metadata )
with open ( f . name ) as f :
output = f . read ( )
assert output == textwrap . dedent ( """ \
2021 - 08 - 16 txn " Gandi " " transfer seleniumconf.us "
Liabilities : CreditCard : AMEX - 15.50 USD
bank - statement : statement . pdf
Expenses : Hosting 15.50 USD """ )
os . remove ( f . name )
2022-02-21 11:22:42 +00:00
2022-03-02 22:36:33 +00:00
2022-02-21 11:22:42 +00:00
def test_totals ( ) :
assert totals ( [
( [ S1 ] , [ B1 ] , [ ] ) ,
( [ S2 ] , [ ] , [ ] ) ,
( [ ] , [ B3_next_day ] , [ ] ) ,
] ) == ( decimal . Decimal ( ' 10 ' ) , decimal . Decimal ( ' 20 ' ) , decimal . Decimal ( ' 30 ' ) )
2022-02-21 11:43:22 +00:00
2022-03-02 22:36:33 +00:00
2022-02-21 11:43:22 +00:00
def test_payee_not_considered_if_check_id_present ( ) :
# These records match aside from check-id.
statement = [ S3 ]
books = [ B3_unmatched_check_id ]
2022-02-23 06:24:31 +00:00
assert match_statement_and_books ( statement , books ) == (
[ ] ,
[ S3 ] ,
[ B3_unmatched_check_id ] ,
)
2022-03-02 22:36:33 +00:00
2022-02-23 06:24:31 +00:00
def test_subset_sum_match ( ) :
statement = [ S4 ]
books = [ B4A , B4B , B4C ]
assert subset_match ( statement , books ) == (
[ ( [ S4 ] , [ B4A , B4B , B4C ] , [ ] ) ] ,
[ ] , # No remaining statement trans.
[ ] , # No remaining books trans.
)
2022-03-02 01:30:56 +00:00
2022-03-02 22:36:33 +00:00
2022-03-02 01:30:56 +00:00
def test_subset_passes_through_all_non_matches ( ) :
""" This was used to locate a bug where some of the non-matches had
gone missing due to mutation of books_trans . """
statement_trans = [
S1 , # No match
S4 , # Match
]
books_trans = [
B2 , # No match
2022-03-02 22:36:33 +00:00
B4A , B4B , B4C , # Match
2022-03-02 01:30:56 +00:00
B3_next_day , B3_next_week , # No match
]
assert subset_match ( statement_trans , books_trans ) == (
[ ( [ S4 ] , [ B4A , B4B , B4C ] , [ ] ) ] , # Matched
[ S1 ] , # No match: preserved intact
[ B2 , B3_next_day , B3_next_week ] # No match: preserved intact
)
2023-01-13 02:58:36 +00:00
2023-01-30 12:21:42 +00:00
def test_handles_amex_csv ( ) :
CSV = """ Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category \n 08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15, " 00000009999 00000009999999999999 \n Gandi.net \n San Francisco \n 00000009999999999999 " ,Gandi.net San Francisco, " NEPTUNUSSTRAAT 41-63 \n HOOFDDORP " ,,2132 JA,NETHERLANDS (THE), ' 999999999999999999 ' ,Merchandise & Supplies-Internet Purchase \n """
expected = [
{
' date ' : datetime . date ( 2021 , 8 , 19 ) ,
' amount ' : decimal . Decimal ( ' -28.15 ' ) ,
' payee ' : ' Gandi San Francisco ' ,
' check_id ' : ' ' ,
' line ' : 2 ,
} ,
]
assert read_amex_csv ( io . StringIO ( CSV ) ) == expected
def test_handles_fr_csv ( ) :
CSV = """ " DD99999999999 " , " 03/31/2022 " , " LAST STATEMENT " , " " , " " , " $1,000.00 " \n " 9999999999999 " , " 04/01/2022 " , " INCOMING WIRE " , " GONDOR S.S. A111111111BCDE0F " , " $6.50 " , " $1,006.50 " \n " DD99999999999 " , " 04/18/2022 " , " CHECK 3741 " , " " , " $-4.50 " , " $1,002.00 " \n " DD99999999999 " , " 04/30/2022 " , " THIS STATEMENT " , " " , " " , " $102.00 " \n """
expected = [
{
' date ' : datetime . date ( 2022 , 4 , 1 ) ,
' amount ' : decimal . Decimal ( ' 6.50 ' ) ,
' payee ' : ' GONDOR S.S. A1111111 ' ,
' check_id ' : ' ' ,
' line ' : 2 ,
} ,
{
' date ' : datetime . date ( 2022 , 4 , 18 ) ,
' amount ' : decimal . Decimal ( ' -4.50 ' ) ,
' payee ' : ' ' ,
' check_id ' : ' 3741 ' ,
' line ' : 3 ,
} ,
]
assert read_fr_csv ( io . StringIO ( CSV ) ) == expected
2023-02-11 04:23:15 +00:00
def test_format_output ( ) :
statement = [ S1 ]
books = [ B1 ]
matches , _ , _ = match_statement_and_books ( statement , books )
output = format_output ( matches , datetime . date ( 2022 , 1 , 1 ) , datetime . date ( 2022 , 2 , 1 ) , ' test.csv ' , True )
assert ' 2022-01-01: 10.00 Patreon / Patreon / 12345 → 2022-01-01: 10.00 Patreon ✓ Matched ' in output