Ben Sturmfels
5793a55dbc
Issue was that the subset matching was passing through the grouped transactions rather than the original transactions when it failed to match.
437 lines
13 KiB
Python
437 lines
13 KiB
Python
import datetime
|
|
import decimal
|
|
import io
|
|
import os
|
|
import tempfile
|
|
import textwrap
|
|
|
|
import pytest
|
|
|
|
from conservancy_beancount.reconcile.statement_reconciler import (
|
|
date_proximity,
|
|
format_output,
|
|
match_statement_and_books,
|
|
metadata_for_match,
|
|
payee_match,
|
|
read_amex_csv,
|
|
read_fr_csv,
|
|
remove_duplicate_words,
|
|
remove_payee_junk,
|
|
round_to_month,
|
|
subset_match,
|
|
totals,
|
|
write_metadata_to_books,
|
|
)
|
|
|
|
# These data structures represent individual transactions as taken from the
|
|
# statement ("S") or the books ("B").
|
|
|
|
# Statement transaction examples.
|
|
S1 = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('10.00'),
|
|
'payee': 'Patreon / Patreon / 123456/ ST-A1B2C3D4G5H6 /',
|
|
'check_id': '',
|
|
'line': 222,
|
|
}
|
|
S2 = {
|
|
'date': datetime.date(2022, 1, 2),
|
|
'amount': decimal.Decimal('20.00'),
|
|
'payee': 'BT*LINODE PHILADELPHIA P',
|
|
'check_id': '',
|
|
'line': 333,
|
|
}
|
|
S3 = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS PO 4067540039 0PORTLAND OR',
|
|
'check_id': '',
|
|
'line': 444,
|
|
}
|
|
S4 = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-2260.00'),
|
|
'payee': 'Trust 0000000362 210',
|
|
'check_id': '',
|
|
'line': 555,
|
|
}
|
|
|
|
# Books transaction examples.
|
|
B1 = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('10.00'),
|
|
'payee': 'Patreon',
|
|
'check_id': '',
|
|
'filename': '2022/imports.beancount',
|
|
'line': 777,
|
|
'bank_statement': '',
|
|
}
|
|
B2 = {
|
|
'date': datetime.date(2022, 1, 2),
|
|
'amount': decimal.Decimal('20.00'),
|
|
'payee': 'Linode',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 888,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_next_day = {
|
|
'date': datetime.date(2022, 1, 4),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_next_week = {
|
|
'date': datetime.date(2022, 1, 10),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_mismatch_amount = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('31.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_payee_mismatch_1 = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'Credit X',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_payee_mismatch_2 = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'Credit Y',
|
|
'check_id': '',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B3_unmatched_check_id = {
|
|
'date': datetime.date(2022, 1, 3),
|
|
'amount': decimal.Decimal('30.00'),
|
|
'payee': 'USPS',
|
|
'check_id': '1234',
|
|
'filename': '2022/main.beancount',
|
|
'line': 999,
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
}
|
|
B4A = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-250.00'),
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
'check_id': '',
|
|
'line': 1000,
|
|
}
|
|
B4B = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-250.00'),
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
'check_id': '',
|
|
'line': 1000,
|
|
}
|
|
B4C = {
|
|
'date': datetime.date(2022, 8, 11),
|
|
'amount': decimal.Decimal('-1760.00'),
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
'check_id': '',
|
|
'line': 1000,
|
|
}
|
|
|
|
|
|
def test_one_exact_match():
|
|
statement = [S1]
|
|
books = [B1]
|
|
assert match_statement_and_books(statement, books) == (
|
|
# Match, match, notes.
|
|
#
|
|
# The matches are a list so we can implement subset-sum matching where
|
|
# multiple books transactions may match to a single statement
|
|
# transaction.
|
|
[([S1], [B1], [])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_multiple_exact_matches():
|
|
statement = [S1, S2]
|
|
books = [B1, B2]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S1], [B1], []), ([S2], [B2], [])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_one_mismatch():
|
|
statement = [S1]
|
|
books = []
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S1],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_multiple_mismatches():
|
|
statement = [S1]
|
|
books = [B2]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S1],
|
|
[B2],
|
|
)
|
|
|
|
|
|
def test_next_day_matches():
|
|
statement = [S3]
|
|
books = [B3_next_day]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S3], [B3_next_day], ['+/- 1 days'])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_next_week_matches():
|
|
statement = [S3]
|
|
books = [B3_next_week]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S3], [B3_next_week], ['+/- 7 days'])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_incorrect_amount_does_not_match():
|
|
statement = [S3]
|
|
books = [B3_mismatch_amount]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S3],
|
|
[B3_mismatch_amount],
|
|
)
|
|
|
|
|
|
def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
|
|
statement = [S3]
|
|
books = [B3_payee_mismatch_1]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[([S3], [B3_payee_mismatch_1], ['payee mismatch'])],
|
|
[],
|
|
[],
|
|
)
|
|
|
|
|
|
def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
|
|
statement = [S3]
|
|
books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
|
|
match = match_statement_and_books(statement, books)
|
|
assert match == (
|
|
[],
|
|
[S3],
|
|
[B3_payee_mismatch_1, B3_payee_mismatch_2],
|
|
)
|
|
|
|
|
|
def test_remove_payee_junk():
|
|
assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
|
|
assert remove_payee_junk('0000010017') == '10017'
|
|
|
|
|
|
def test_date_proximity():
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23)) == 1.0
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0
|
|
|
|
|
|
def test_remove_duplicate_words():
|
|
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
|
|
|
|
|
def test_payee_matches_when_first_word_matches():
|
|
assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
|
|
assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
|
|
|
|
|
|
def test_metadata_for_match(monkeypatch):
|
|
monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
|
|
assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
|
|
('2022/imports.beancount', 777, ' bank-statement: "statement.pdf"'),
|
|
('2022/imports.beancount', 777, ' bank-statement-csv: "statement.csv:222"'),
|
|
]
|
|
|
|
|
|
def test_no_metadata_if_no_matches():
|
|
assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
|
|
|
|
def test_write_to_books():
|
|
books = textwrap.dedent("""\
|
|
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
|
|
Liabilities:CreditCard:AMEX -15.50 USD
|
|
Expenses:Hosting 15.50 USD""")
|
|
f = tempfile.NamedTemporaryFile('w', delete=False)
|
|
f.write(books)
|
|
f.close()
|
|
metadata = [(f.name, 2, ' bank-statement: statement.pdf')]
|
|
write_metadata_to_books(metadata)
|
|
with open(f.name) as f:
|
|
output = f.read()
|
|
assert output == textwrap.dedent("""\
|
|
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
|
|
Liabilities:CreditCard:AMEX -15.50 USD
|
|
bank-statement: statement.pdf
|
|
Expenses:Hosting 15.50 USD""")
|
|
os.remove(f.name)
|
|
|
|
|
|
def test_totals():
|
|
assert totals([
|
|
([S1], [B1], []),
|
|
([S2], [], []),
|
|
([], [B3_next_day], []),
|
|
]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))
|
|
|
|
|
|
def test_payee_not_considered_if_check_id_present():
|
|
# These records match aside from check-id.
|
|
statement = [S3]
|
|
books = [B3_unmatched_check_id]
|
|
assert match_statement_and_books(statement, books) == (
|
|
[],
|
|
[S3],
|
|
[B3_unmatched_check_id],
|
|
)
|
|
|
|
|
|
def test_subset_sum_match():
|
|
statement = [S4]
|
|
books = [B4A, B4B, B4C]
|
|
assert subset_match(statement, books) == (
|
|
[([S4], [B4A, B4B, B4C], [])],
|
|
[], # No remaining statement trans.
|
|
[], # No remaining books trans.
|
|
)
|
|
|
|
|
|
def test_subset_passes_through_all_non_matches():
|
|
"""This was used to locate a bug where some of the non-matches had
|
|
gone missing due to mutation of books_trans."""
|
|
statement_trans = [
|
|
S1, # No match
|
|
S4, # Match
|
|
]
|
|
books_trans = [
|
|
B2, # No match
|
|
B4A, B4B, B4C, # Match
|
|
B3_next_day, B3_next_week, # No match
|
|
]
|
|
assert subset_match(statement_trans, books_trans) == (
|
|
[([S4], [B4A, B4B, B4C], [])], # Matched
|
|
[S1], # No match: preserved intact
|
|
[B2, B3_next_day, B3_next_week] # No match: preserved intact
|
|
)
|
|
|
|
|
|
def test_subset_passes_though_unmatched_transactions_with_same_payee():
|
|
# Tracy noticed that when multiple books payments had the same date and
|
|
# payee and were unmatched, they were being displayed lumped together, when
|
|
# they should have remained separate.
|
|
B1a = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('100.00'),
|
|
'payee': 'Hannah',
|
|
'check_id': '',
|
|
'filename': '2022/imports.beancount',
|
|
'line': 777,
|
|
'bank_statement': '',
|
|
}
|
|
B1b = {
|
|
'date': datetime.date(2022, 1, 1),
|
|
'amount': decimal.Decimal('100.00'),
|
|
'payee': 'Hannah',
|
|
'check_id': '',
|
|
'filename': '2022/imports.beancount',
|
|
'line': 797,
|
|
'bank_statement': '',
|
|
}
|
|
assert subset_match([], [B1a, B1b]) == (
|
|
[], [], [B1a, B1b], # No match: two preserved intact
|
|
)
|
|
|
|
|
|
def test_handles_amex_csv():
|
|
CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
|
|
expected = [
|
|
{
|
|
'date': datetime.date(2021, 8, 19),
|
|
'amount': decimal.Decimal('-28.15'),
|
|
'payee': 'Gandi San Francisco',
|
|
'check_id': '',
|
|
'line': 2,
|
|
},
|
|
]
|
|
assert read_amex_csv(io.StringIO(CSV)) == expected
|
|
|
|
|
|
def test_handles_fr_csv():
|
|
CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK 3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
|
|
expected = [
|
|
{
|
|
'date': datetime.date(2022, 4, 1),
|
|
'amount': decimal.Decimal('6.50'),
|
|
'payee': 'GONDOR S.S. A1111111',
|
|
'check_id': '',
|
|
'line': 2,
|
|
},
|
|
{
|
|
'date': datetime.date(2022, 4, 18),
|
|
'amount': decimal.Decimal('-4.50'),
|
|
'payee': '',
|
|
'check_id': '3741',
|
|
'line': 3,
|
|
},
|
|
]
|
|
assert read_fr_csv(io.StringIO(CSV)) == expected
|
|
|
|
|
|
def test_format_output():
|
|
statement = [S1]
|
|
books = [B1]
|
|
matches, _, _ = match_statement_and_books(statement, books)
|
|
output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True)
|
|
assert '2022-01-01: 10.00 Patreon / Patreon / 12345 → 2022-01-01: 10.00 Patreon ✓ Matched' in output
|
|
|
|
|
|
month_test_data = [
|
|
((datetime.date(2022, 1, 2), datetime.date(2022, 1, 30)),
|
|
(datetime.date(2022, 1, 1), datetime.date(2022, 1, 31))),
|
|
((datetime.date(2022, 4, 2), datetime.date(2022, 4, 29)),
|
|
(datetime.date(2022, 4, 1), datetime.date(2022, 4, 30))),
|
|
((datetime.date(2022, 2, 2), datetime.date(2022, 2, 27)),
|
|
(datetime.date(2022, 2, 1), datetime.date(2022, 2, 28))),
|
|
((datetime.date(2024, 2, 2), datetime.date(2024, 2, 27)),
|
|
(datetime.date(2024, 2, 1), datetime.date(2024, 2, 29))),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize('input_dates,rounded_dates', month_test_data)
|
|
def test_rounds_to_full_month(input_dates, rounded_dates):
|
|
assert round_to_month(*input_dates) == rounded_dates
|