482 lines
17 KiB
Python
482 lines
17 KiB
Python
import argparse
|
|
import datetime
|
|
import decimal
|
|
import io
|
|
import os
|
|
import tempfile
|
|
import textwrap
|
|
|
|
import pytest
|
|
|
|
from conservancy_beancount.reconcile.statement_reconciler import (
|
|
date_proximity,
|
|
format_output,
|
|
match_statement_and_books,
|
|
metadata_for_match,
|
|
parse_repo_relative_path,
|
|
payee_match,
|
|
read_amex_csv,
|
|
read_fr_csv,
|
|
read_citizens_csv,
|
|
remove_duplicate_words,
|
|
remove_payee_junk,
|
|
round_to_month,
|
|
subset_match,
|
|
totals,
|
|
write_metadata_to_books,
|
|
)
|
|
|
|
# These data structures represent individual transactions as taken from the
|
|
# statement ("S") or the books ("B").
|
|
|
|
# Statement transaction examples.
|
|
S1 = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('10.00'),
|
|
'payee': 'Patreon / Patreon / 123456/ ST-A1B2C3D4G5H6 /',
|
|
'check_id': '',
|
|
'line': 222,
|
|
}
|
|
S2 = {
|
|
'date': datetime.date(2022, 1, 2),
|
|
'amount': decimal.Decimal('20.00'),
|
|
'payee': 'BT*LINODE PHILADELPHIA P',
|
|
'check_id': '',
|
|
'line': 333,
|
|
}
|
|
S3 = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS PO 4067540039 0PORTLAND OR',
|
|
'check_id': '',
|
|
'line': 444,
|
|
}
|
|
S4 = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-2260.00'),
|
|
'payee': 'Trust 0000000362 210',
|
|
'check_id': '',
|
|
'line': 555,
|
|
}
|
|
|
|
# Books transaction examples.
|
|
B1 = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('10.00'),
|
|
'payee': 'Patreon',
|
|
'check_id': '',
|
|
'filename': '2022/imports.beancount',
|
|
'line': 777,
|
|
'bank_statement': '',
|
|
}
|
|
B2 = {
|
|
'date': datetime.date(2022, 1, 2),
|
|
'amount': decimal.Decimal('20.00'),
|
|
'payee': 'Linode',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 888,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_next_day = {
|
|
'date': datetime.date(2022, 1, 4),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_next_week = {
|
|
'date': datetime.date(2022, 1, 10),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_mismatch_amount = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('31.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_payee_mismatch_1 = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'Credit X',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_payee_mismatch_2 = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'Credit Y',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_unmatched_check_id = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '1234',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B4A = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-250.00'),
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
'check_id': '',
|
|
'line': 1000,
|
|
}
|
|
B4B = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-250.00'),
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
'check_id': '',
|
|
'line': 1000,
|
|
}
|
|
B4C = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-1760.00'),
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
'check_id': '',
|
|
'line': 1000,
|
|
}
|
|
|
|
|
|
def test_warns_missing_env(monkeypatch):
|
|
monkeypatch.delenv('CONSERVANCY_REPOSITORY', raising=False)
|
|
with pytest.raises(argparse.ArgumentTypeError) as excinfo:
|
|
parse_repo_relative_path('/') # path / always exists
|
|
assert "CONSERVANCY_REPOSITORY" in str(excinfo.value)
|
|
|
|
|
|
def test_one_exact_match():
|
|
statement = [S1]
|
|
books = [B1]
|
|
assert match_statement_and_books(statement, books) == (
|
|
# Match, match, notes.
|
|
#
|
|
# The matches are a list so we can implement subset-sum matching where
|
|
# multiple books transactions may match to a single statement
|
|
# transaction.
|
|
[([S1], [B1], [])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_multiple_exact_matches():
|
|
statement = [S1, S2]
|
|
books = [B1, B2]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S1], [B1], []), ([S2], [B2], [])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_one_mismatch():
|
|
statement = [S1]
|
|
books = []
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S1],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_multiple_mismatches():
|
|
statement = [S1]
|
|
books = [B2]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S1],
|
|
[B2],
|
|
)
|
|
|
|
|
|
def test_next_day_matches():
|
|
statement = [S3]
|
|
books = [B3_next_day]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S3], [B3_next_day], ['+/- 1 days'])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_next_week_matches():
|
|
statement = [S3]
|
|
books = [B3_next_week]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S3], [B3_next_week], ['+/- 7 days'])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_incorrect_amount_does_not_match():
|
|
statement = [S3]
|
|
books = [B3_mismatch_amount]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S3],
|
|
[B3_mismatch_amount],
|
|
)
|
|
|
|
|
|
def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
|
|
statement = [S3]
|
|
books = [B3_payee_mismatch_1]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S3], [B3_payee_mismatch_1], ['payee mismatch'])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
|
|
statement = [S3]
|
|
books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
|
|
match = match_statement_and_books(statement, books)
|
|
assert match == (
|
|
[],
|
|
[S3],
|
|
[B3_payee_mismatch_1, B3_payee_mismatch_2],
|
|
)
|
|
|
|
|
|
def test_remove_payee_junk():
|
|
assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
|
|
assert remove_payee_junk('0000010017') == '10017'
|
|
|
|
|
|
def test_date_proximity():
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23)) == 1.0
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0
|
|
|
|
|
|
def test_remove_duplicate_words():
|
|
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
|
|
|
|
|
def test_payee_matches_when_first_word_matches():
|
|
assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
|
|
assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
|
|
|
|
|
|
def test_metadata_for_match(monkeypatch):
|
|
monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
|
|
assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
|
|
('2022/imports.beancount', 777, ' bank-statement: "statement.pdf"'),
|
|
('2022/imports.beancount', 777, ' bank-statement-csv: "statement.csv:222"'),
|
|
]
|
|
|
|
|
|
def test_no_metadata_if_no_matches():
|
|
assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
|
|
|
|
def test_write_to_books():
|
|
books = textwrap.dedent("""\
|
|
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
|
|
Liabilities:CreditCard:AMEX -15.50 USD
|
|
Expenses:Hosting 15.50 USD""")
|
|
f = tempfile.NamedTemporaryFile('w', delete=False)
|
|
f.write(books)
|
|
f.close()
|
|
metadata = [(f.name, 2, ' bank-statement: statement.pdf')]
|
|
write_metadata_to_books(metadata)
|
|
with open(f.name) as f:
|
|
output = f.read()
|
|
assert output == textwrap.dedent("""\
|
|
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
|
|
Liabilities:CreditCard:AMEX -15.50 USD
|
|
bank-statement: statement.pdf
|
|
Expenses:Hosting 15.50 USD""")
|
|
os.remove(f.name)
|
|
|
|
|
|
def test_totals():
|
|
assert totals([
|
|
([S1], [B1], []),
|
|
([S2], [], []),
|
|
([], [B3_next_day], []),
|
|
]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))
|
|
|
|
|
|
def test_payee_not_considered_if_check_id_present():
|
|
# These records match aside from check-id.
|
|
statement = [S3]
|
|
books = [B3_unmatched_check_id]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S3],
|
|
[B3_unmatched_check_id],
|
|
)
|
|
|
|
|
|
def test_subset_sum_match():
|
|
statement = [S4]
|
|
books = [B4A, B4B, B4C]
|
|
assert subset_match(statement, books) == (
|
|
[([S4], [B4A, B4B, B4C], [])],
|
|
[], # No remaining statement trans.
|
|
[], # No remaining books trans.
|
|
)
|
|
|
|
|
|
def test_subset_passes_through_all_non_matches():
|
|
"""This was used to locate a bug where some of the non-matches had
|
|
gone missing due to mutation of books_trans."""
|
|
statement_trans = [
|
|
S1, # No match
|
|
S4, # Match
|
|
]
|
|
books_trans = [
|
|
B2, # No match
|
|
B4A, B4B, B4C, # Match
|
|
B3_next_day, B3_next_week, # No match
|
|
]
|
|
assert subset_match(statement_trans, books_trans) == (
|
|
[([S4], [B4A, B4B, B4C], [])], # Matched
|
|
[S1], # No match: preserved intact
|
|
[B2, B3_next_day, B3_next_week] # No match: preserved intact
|
|
)
|
|
|
|
|
|
def test_subset_passes_though_unmatched_transactions_with_same_payee():
|
|
# Tracy noticed that when multiple books payments had the same date and
|
|
# payee and were unmatched, they were being displayed lumped together, when
|
|
# they should have remained separate.
|
|
B1a = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('100.00'),
|
|
'payee': 'Hannah',
|
|
'check_id': '',
|
|
'filename': '2022/imports.beancount',
|
|
'line': 777,
|
|
'bank_statement': '',
|
|
}
|
|
B1b = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('100.00'),
|
|
'payee': 'Hannah',
|
|
'check_id': '',
|
|
'filename': '2022/imports.beancount',
|
|
'line': 797,
|
|
'bank_statement': '',
|
|
}
|
|
assert subset_match([], [B1a, B1b]) == (
|
|
[], [], [B1a, B1b], # No match: two preserved intact
|
|
)
|
|
|
|
|
|
def test_handles_amex_csv():
|
|
CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
|
|
expected = [
|
|
{
|
|
'date': datetime.date(2021, 8, 19),
|
|
'amount': decimal.Decimal('-28.15'),
|
|
'payee': 'Gandi San Francisco',
|
|
'check_id': '',
|
|
'line': 2,
|
|
},
|
|
]
|
|
assert read_amex_csv(io.StringIO(CSV)) == expected
|
|
|
|
|
|
def test_handles_fr_csv():
|
|
CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
|
|
expected = [
|
|
{
|
|
'date': datetime.date(2022, 4, 1),
|
|
'amount': decimal.Decimal('6.50'),
|
|
'payee': 'GONDOR S.S. A1111111',
|
|
'check_id': '',
|
|
'line': 2,
|
|
},
|
|
{
|
|
'date': datetime.date(2022, 4, 18),
|
|
'amount': decimal.Decimal('-4.50'),
|
|
'payee': '',
|
|
'check_id': '3741',
|
|
'line': 3,
|
|
},
|
|
]
|
|
assert read_fr_csv(io.StringIO(CSV)) == expected
|
|
|
|
|
|
def test_handles_citizens_csv():
|
|
CSV = """Effective Date,Bank ID,Account Number,Account Name,Transaction Description,Status,Debit/Credit,Amount,Bank Reference,Customer Reference,Transaction Detail,BAI Code,Type,Additional Information,Currency,Image,Advice Reference,Transaction Number,Originator Id,Originator Name,Company Entry Description,Effective Date,Related Reference,Value Date of Payment,Transaction Reference,SEC Type,Settlement Reference,Payer Account Name,Payer Account,Payer Address 1,Payer Address 2,Payer Address 3,Payer Address 4,Payer Bank Code,Payer Bank Name,Payer Bank Address 1,Payer Bank Address 2,Payer Bank Address 3,Sender Bank Id,Sender Bank,Sender Address 1,Sender Address 2,Sender Address 3,Sender Address 4,Beneficiary Name,Beneficiary Account,Beneficiary Address 1,Beneficiary Address 2,Beneficiary Address 3,Beneficiary Address 4,Beneficiary Bank Code,Beneficiary Bank Name,Beneficiary Bank Address 1,Beneficiary Bank Address 2,Beneficiary Bank Address 3,Reference For Beneficiary,Foreign Amount,Foreign Currency,Exchange Rate,Payment Details 1,Payment Details 2,Payment Details 3,Payment Details 4,Payment Details 5,Payment Details 6,Bank Reference,Bank Reference 2,Bank Reference 3,Bank Reference 4,Bank Reference 5,Bank Reference 6,Receiving Bank Id,Receiving Bank,Receiving Bank Address 1,Receiving Bank Address 2,Receiving Bank Address 3,Receiving Bank Address 4,Intermediary Bank Id,Intermediary Bank Name,Intermediary Bank Address 1,Intermediary Bank Address 2,Intermediary Bank Address 3,Intermediary Bank Address 4,Ordering Bank,Ordering Bank Address 1,Ordering Bank Address 2,Ordering Bank Address 3,Ordering Bank Address 4,Instructing Bank,Terminal Location,Terminal City,Terminal State,TIME\n05/30/2025,021313103,4030961273,SOFTWARE FREEDOM CONSERVANCY INC,PREAUTHORIZED ACH DEBIT,Cleared,Debit,-275.64,,,SEC : CCD ORIG NAME : PAYCHEX EIB CO. ENTRY DESC: INVOICE RECIP NAME: SOFTWARE FREEDOM CONSE INDIVIDUAL ID: X12201000015079 EFFECTIVE DATE: 250530 CO DESCRIPTION DATE: 250530 PAR#: 025149005728064 ADVICEREF: 025149005728064 ,455,ACH,View Details,USD,, 025149005728064, 025149005728064,, PAYCHEX EIB , INVOICE , 250530 ,,,, CCD ,,,,,,,,,,,,,,,,,,, SOFTWARE FREEDOM CONSE ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n"""
|
|
expected = [
|
|
{
|
|
'date': datetime.date(2025, 5, 30),
|
|
'amount': decimal.Decimal('-275.64'),
|
|
'payee': 'PAYCHEX EIB SOFTWARE FREEDOM CONSE',
|
|
'check_id': '',
|
|
'line': 2,
|
|
},
|
|
]
|
|
assert read_citizens_csv(io.StringIO(CSV)) == expected
|
|
|
|
|
|
def test_format_output():
|
|
statement = [S1]
|
|
books = [B1]
|
|
matches, _, _ = match_statement_and_books(statement, books)
|
|
output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True)
|
|
assert '2022-01-01: 10.00 Patreon / Patreon / 12345 → 2022-01-01: 10.00 Patreon ✓ Matched' in output
|
|
|
|
|
|
month_test_data = [
|
|
((datetime.date(2022, 1, 2), datetime.date(2022, 1, 30)),
|
|
(datetime.date(2022, 1, 1), datetime.date(2022, 1, 31))),
|
|
((datetime.date(2022, 4, 2), datetime.date(2022, 4, 29)),
|
|
(datetime.date(2022, 4, 1), datetime.date(2022, 4, 30))),
|
|
((datetime.date(2022, 2, 2), datetime.date(2022, 2, 27)),
|
|
(datetime.date(2022, 2, 1), datetime.date(2022, 2, 28))),
|
|
((datetime.date(2024, 2, 2), datetime.date(2024, 2, 27)),
|
|
(datetime.date(2024, 2, 1), datetime.date(2024, 2, 29))),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize('input_dates,rounded_dates', month_test_data)
|
|
def test_rounds_to_full_month(input_dates, rounded_dates):
|
|
assert round_to_month(*input_dates) == rounded_dates
|
|
|
|
|
|
def test_handles_payee_whitespace_only():
|
|
"""Sometimes Citizen Bank statements have a single space in the transaction desc."""
|
|
statement = [
|
|
{
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('10.00'),
|
|
'payee': ' ',
|
|
'check_id': '',
|
|
}
|
|
]
|
|
books = [
|
|
{
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('10.00'),
|
|
'payee': ' ',
|
|
'check_id': '',
|
|
}
|
|
]
|
|
assert match_statement_and_books(statement, books)
|