2022-02-18 09:36:11 +00:00
|
|
|
import datetime
|
|
|
|
import decimal
|
2022-02-21 07:31:07 +00:00
|
|
|
import os
|
|
|
|
import tempfile
|
|
|
|
import textwrap
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-02-22 22:58:53 +00:00
|
|
|
from conservancy_beancount.reconcile.statement_reconciler import (
|
2022-02-18 13:27:48 +00:00
|
|
|
date_proximity,
|
2023-01-13 02:58:36 +00:00
|
|
|
match_statement_and_books,
|
2022-02-21 07:31:07 +00:00
|
|
|
metadata_for_match,
|
2023-01-13 02:58:36 +00:00
|
|
|
payee_match,
|
|
|
|
remove_duplicate_words,
|
|
|
|
remove_payee_junk,
|
|
|
|
standardize_amex_record,
|
|
|
|
standardize_fr_record,
|
2022-02-23 06:24:31 +00:00
|
|
|
subset_match,
|
2023-01-13 02:58:36 +00:00
|
|
|
totals,
|
|
|
|
write_metadata_to_books,
|
2022-02-18 13:27:48 +00:00
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-02-22 22:58:53 +00:00
|
|
|
# These data structures represent individual transactions as taken from the
|
|
|
|
# statement ("S") or the books ("B").
|
|
|
|
|
|
|
|
# Statement transaction examples.
|
2022-02-18 09:36:11 +00:00
|
|
|
S1 = {
|
|
|
|
'date': datetime.date(2022, 1, 1),
|
|
|
|
'amount': decimal.Decimal('10.00'),
|
|
|
|
'payee': 'Patreon / Patreon / 123456/ ST-A1B2C3D4G5H6 /',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'line': 222,
|
|
|
|
}
|
|
|
|
S2 = {
|
|
|
|
'date': datetime.date(2022, 1, 2),
|
|
|
|
'amount': decimal.Decimal('20.00'),
|
|
|
|
'payee': 'BT*LINODE PHILADELPHIA P',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'line': 333,
|
|
|
|
}
|
|
|
|
S3 = {
|
|
|
|
'date': datetime.date(2022, 1, 3),
|
|
|
|
'amount': decimal.Decimal('30.00'),
|
|
|
|
'payee': 'USPS PO 4067540039 0PORTLAND OR',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'line': 444,
|
|
|
|
}
|
2022-02-23 06:24:31 +00:00
|
|
|
S4 = {
|
|
|
|
'date': datetime.date(2022, 8, 11),
|
|
|
|
'amount': decimal.Decimal('-2260.00'),
|
|
|
|
'payee': 'Trust 0000000362 210',
|
|
|
|
'check_id': '',
|
|
|
|
'line': 555,
|
|
|
|
}
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-02-22 22:58:53 +00:00
|
|
|
# Books transaction examples.
|
2022-02-18 09:36:11 +00:00
|
|
|
B1 = {
|
|
|
|
'date': datetime.date(2022, 1, 1),
|
|
|
|
'amount': decimal.Decimal('10.00'),
|
|
|
|
'payee': 'Patreon',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/imports.beancount',
|
|
|
|
'line': 777,
|
2022-02-21 07:31:07 +00:00
|
|
|
'bank_statement': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
}
|
|
|
|
B2 = {
|
|
|
|
'date': datetime.date(2022, 1, 2),
|
|
|
|
'amount': decimal.Decimal('20.00'),
|
|
|
|
'payee': 'Linode',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 888,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
|
|
|
B3_next_day = {
|
|
|
|
'date': datetime.date(2022, 1, 4),
|
|
|
|
'amount': decimal.Decimal('30.00'),
|
|
|
|
'payee': 'USPS',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 999,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
|
|
|
B3_next_week = {
|
|
|
|
'date': datetime.date(2022, 1, 10),
|
|
|
|
'amount': decimal.Decimal('30.00'),
|
|
|
|
'payee': 'USPS',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 999,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
|
|
|
B3_mismatch_amount = {
|
|
|
|
'date': datetime.date(2022, 1, 3),
|
|
|
|
'amount': decimal.Decimal('31.00'),
|
|
|
|
'payee': 'USPS',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 999,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
|
|
|
B3_payee_mismatch_1 = {
|
|
|
|
'date': datetime.date(2022, 1, 3),
|
|
|
|
'amount': decimal.Decimal('30.00'),
|
|
|
|
'payee': 'Credit X',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 999,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
|
|
|
B3_payee_mismatch_2 = {
|
|
|
|
'date': datetime.date(2022, 1, 3),
|
|
|
|
'amount': decimal.Decimal('30.00'),
|
|
|
|
'payee': 'Credit Y',
|
2022-02-18 13:27:48 +00:00
|
|
|
'check_id': '',
|
2022-02-18 09:36:11 +00:00
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 999,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
2022-02-21 11:43:22 +00:00
|
|
|
B3_unmatched_check_id = {
|
|
|
|
'date': datetime.date(2022, 1, 3),
|
|
|
|
'amount': decimal.Decimal('30.00'),
|
|
|
|
'payee': 'USPS',
|
|
|
|
'check_id': '1234',
|
|
|
|
'filename': '2022/main.beancount',
|
|
|
|
'line': 999,
|
|
|
|
'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
|
|
|
|
}
|
2022-02-23 06:24:31 +00:00
|
|
|
B4A = {
|
|
|
|
'date': datetime.date(2022, 8, 11),
|
|
|
|
'amount': decimal.Decimal('-250.00'),
|
|
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
|
|
'check_id': '',
|
|
|
|
'line': 1000,
|
|
|
|
}
|
|
|
|
B4B = {
|
|
|
|
'date': datetime.date(2022, 8, 11),
|
|
|
|
'amount': decimal.Decimal('-250.00'),
|
|
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
|
|
'check_id': '',
|
|
|
|
'line': 1000,
|
|
|
|
}
|
|
|
|
B4C = {
|
|
|
|
'date': datetime.date(2022, 8, 11),
|
|
|
|
'amount': decimal.Decimal('-1760.00'),
|
|
|
|
'payee': 'TRUST 0000000362 ACH Retirement Plan',
|
|
|
|
'check_id': '',
|
|
|
|
'line': 1000,
|
|
|
|
}
|
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
|
|
|
|
def test_one_exact_match():
|
|
|
|
statement = [S1]
|
|
|
|
books = [B1]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
2022-02-22 22:58:53 +00:00
|
|
|
# Match, match, notes.
|
|
|
|
#
|
|
|
|
# The matches are a list so we can implement subset-sum matching where
|
|
|
|
# multiple books transactions may match to a single statement
|
|
|
|
# transaction.
|
2022-02-23 06:24:31 +00:00
|
|
|
[([S1], [B1], [])],
|
|
|
|
[],
|
|
|
|
[],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_multiple_exact_matches():
|
|
|
|
statement = [S1, S2]
|
|
|
|
books = [B1, B2]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[([S1], [B1], []), ([S2], [B2], [])],
|
|
|
|
[],
|
|
|
|
[],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_one_mismatch():
|
|
|
|
statement = [S1]
|
|
|
|
books = []
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[],
|
|
|
|
[S1],
|
|
|
|
[],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_multiple_mismatches():
|
|
|
|
statement = [S1]
|
|
|
|
books = [B2]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[],
|
|
|
|
[S1],
|
|
|
|
[B2],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_next_day_matches():
|
|
|
|
statement = [S3]
|
|
|
|
books = [B3_next_day]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[([S3], [B3_next_day], ['+/- 1 days'])],
|
|
|
|
[],
|
|
|
|
[],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_next_week_matches():
|
|
|
|
statement = [S3]
|
|
|
|
books = [B3_next_week]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[([S3], [B3_next_week], ['+/- 7 days'])],
|
|
|
|
[],
|
|
|
|
[],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_incorrect_amount_does_not_match():
|
|
|
|
statement = [S3]
|
|
|
|
books = [B3_mismatch_amount]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[],
|
|
|
|
[S3],
|
|
|
|
[B3_mismatch_amount],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
|
|
|
|
statement = [S3]
|
|
|
|
books = [B3_payee_mismatch_1]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
2022-02-24 11:43:37 +00:00
|
|
|
[([S3], [B3_payee_mismatch_1], ['payee mismatch'])],
|
2022-02-23 06:24:31 +00:00
|
|
|
[],
|
|
|
|
[],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
|
|
|
|
statement = [S3]
|
|
|
|
books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
|
|
|
|
match = match_statement_and_books(statement, books)
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match == (
|
|
|
|
[],
|
|
|
|
[S3],
|
|
|
|
[B3_payee_mismatch_1, B3_payee_mismatch_2],
|
|
|
|
)
|
2022-02-18 09:36:11 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_remove_payee_junk():
|
|
|
|
assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
|
|
|
|
assert remove_payee_junk('0000010017') == '10017'
|
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-18 09:36:11 +00:00
|
|
|
def test_date_proximity():
|
|
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23)) == 1.0
|
|
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5
|
|
|
|
assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0
|
2022-02-18 13:27:48 +00:00
|
|
|
|
2022-02-21 01:16:24 +00:00
|
|
|
|
|
|
|
def test_remove_duplicate_words():
|
|
|
|
assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'
|
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-21 01:16:24 +00:00
|
|
|
def test_payee_matches_when_first_word_matches():
|
|
|
|
assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
|
|
|
|
assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8
|
2022-02-21 07:31:07 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-21 07:31:07 +00:00
|
|
|
def test_metadata_for_match(monkeypatch):
|
|
|
|
monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
|
|
|
|
assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
|
2022-02-24 11:43:37 +00:00
|
|
|
('2022/imports.beancount', 777, ' bank-statement: "statement.pdf"'),
|
|
|
|
('2022/imports.beancount', 777, ' bank-statement-csv: "statement.csv:222"'),
|
2022-02-21 07:31:07 +00:00
|
|
|
]
|
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-21 11:22:42 +00:00
|
|
|
def test_no_metadata_if_no_matches():
|
|
|
|
assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
|
|
assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == []
|
|
|
|
assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == []
|
2022-02-21 07:31:07 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-21 07:31:07 +00:00
|
|
|
def test_write_to_books():
|
|
|
|
books = textwrap.dedent("""\
|
|
|
|
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
|
|
|
|
Liabilities:CreditCard:AMEX -15.50 USD
|
|
|
|
Expenses:Hosting 15.50 USD""")
|
|
|
|
f = tempfile.NamedTemporaryFile('w', delete=False)
|
|
|
|
f.write(books)
|
|
|
|
f.close()
|
|
|
|
metadata = [(f.name, 2, ' bank-statement: statement.pdf')]
|
|
|
|
write_metadata_to_books(metadata)
|
|
|
|
with open(f.name) as f:
|
|
|
|
output = f.read()
|
|
|
|
assert output == textwrap.dedent("""\
|
|
|
|
2021-08-16 txn "Gandi" "transfer seleniumconf.us"
|
|
|
|
Liabilities:CreditCard:AMEX -15.50 USD
|
|
|
|
bank-statement: statement.pdf
|
|
|
|
Expenses:Hosting 15.50 USD""")
|
|
|
|
os.remove(f.name)
|
2022-02-21 11:22:42 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-21 11:22:42 +00:00
|
|
|
def test_totals():
|
|
|
|
assert totals([
|
|
|
|
([S1], [B1], []),
|
|
|
|
([S2], [], []),
|
|
|
|
([], [B3_next_day], []),
|
|
|
|
]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))
|
2022-02-21 11:43:22 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-21 11:43:22 +00:00
|
|
|
def test_payee_not_considered_if_check_id_present():
|
|
|
|
# These records match aside from check-id.
|
|
|
|
statement = [S3]
|
|
|
|
books = [B3_unmatched_check_id]
|
2022-02-23 06:24:31 +00:00
|
|
|
assert match_statement_and_books(statement, books) == (
|
|
|
|
[],
|
|
|
|
[S3],
|
|
|
|
[B3_unmatched_check_id],
|
|
|
|
)
|
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-02-23 06:24:31 +00:00
|
|
|
def test_subset_sum_match():
|
|
|
|
statement = [S4]
|
|
|
|
books = [B4A, B4B, B4C]
|
|
|
|
assert subset_match(statement, books) == (
|
|
|
|
[([S4], [B4A, B4B, B4C], [])],
|
|
|
|
[], # No remaining statement trans.
|
|
|
|
[], # No remaining books trans.
|
|
|
|
)
|
2022-03-02 01:30:56 +00:00
|
|
|
|
2022-03-02 22:36:33 +00:00
|
|
|
|
2022-03-02 01:30:56 +00:00
|
|
|
def test_subset_passes_through_all_non_matches():
|
|
|
|
"""This was used to locate a bug where some of the non-matches had
|
|
|
|
gone missing due to mutation of books_trans."""
|
|
|
|
statement_trans = [
|
|
|
|
S1, # No match
|
|
|
|
S4, # Match
|
|
|
|
]
|
|
|
|
books_trans = [
|
|
|
|
B2, # No match
|
2022-03-02 22:36:33 +00:00
|
|
|
B4A, B4B, B4C, # Match
|
2022-03-02 01:30:56 +00:00
|
|
|
B3_next_day, B3_next_week, # No match
|
|
|
|
]
|
|
|
|
assert subset_match(statement_trans, books_trans) == (
|
|
|
|
[([S4], [B4A, B4B, B4C], [])], # Matched
|
|
|
|
[S1], # No match: preserved intact
|
|
|
|
[B2, B3_next_day, B3_next_week] # No match: preserved intact
|
|
|
|
)
|
2023-01-13 02:58:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_handles_fr_record_with_comma_separators():
|
|
|
|
# CSV would look something like:
|
|
|
|
#
|
|
|
|
# "Date","ABA Num","Currency","Account Num","Account Name","Description","BAI Code","Amount","Serial Num","Ref Num","Detail"
|
|
|
|
# "02/07/2022",,,,,,,"10,000.00",,,"XXXX"
|
|
|
|
input_row = {
|
|
|
|
'Date': '02/07/2022',
|
|
|
|
'Amount': '10,000.00',
|
|
|
|
'Detail': 'XXXX',
|
|
|
|
'Serial Num': '',
|
|
|
|
}
|
|
|
|
expected = {
|
|
|
|
'date': datetime.date(2022, 2, 7),
|
|
|
|
'amount': decimal.Decimal('10000'),
|
|
|
|
'payee': 'XXXX',
|
|
|
|
'check_id': '',
|
|
|
|
'line': 1,
|
|
|
|
}
|
|
|
|
assert standardize_fr_record(input_row, line=1) == expected
|
|
|
|
|
|
|
|
|
|
|
|
def test_handles_amex_record_with_comma_separators():
|
|
|
|
# This insn't typically a problem with AMEX, but adding for completeness.
|
|
|
|
input_row = {
|
|
|
|
'Date': '02/07/2022',
|
|
|
|
'Amount': '-10,000.00', # Amounts are from Bank's perspective/negated.
|
|
|
|
'Description': 'XXXX',
|
|
|
|
'Serial Num': '',
|
|
|
|
}
|
|
|
|
expected = {
|
|
|
|
'date': datetime.date(2022, 2, 7),
|
|
|
|
'amount': decimal.Decimal('10000'),
|
|
|
|
'payee': 'XXXX',
|
|
|
|
'check_id': '',
|
|
|
|
'line': 1,
|
|
|
|
}
|
|
|
|
assert standardize_amex_record(input_row, line=1) == expected
|