conservancy_beancount/tests/test_reconcile.py

import datetime
import decimal
import io
import os
import tempfile
import textwrap

import pytest

from conservancy_beancount.reconcile.statement_reconciler import (
    date_proximity,
    format_output,
    match_statement_and_books,
    metadata_for_match,
    payee_match,
    read_amex_csv,
    read_fr_csv,
    remove_duplicate_words,
    remove_payee_junk,
    round_to_month,
    subset_match,
    totals,
    write_metadata_to_books,
)

# These data structures represent individual transactions as taken from the
# statement ("S") or the books ("B").

# Statement transaction examples.
S1 = {
    'date': datetime.date(2022, 1, 1),
    'amount': decimal.Decimal('10.00'),
    'payee': 'Patreon         / Patreon   / 123456/ ST-A1B2C3D4G5H6       /',
    'check_id': '',
    'line': 222,
}
S2 = {
    'date': datetime.date(2022, 1, 2),
    'amount': decimal.Decimal('20.00'),
    'payee': 'BT*LINODE           PHILADELPHIA        P',
    'check_id': '',
    'line': 333,
}
S3 = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS PO 4067540039 0PORTLAND            OR',
    'check_id': '',
    'line': 444,
}
S4 = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-2260.00'),
    'payee': 'Trust 0000000362 210',
    'check_id': '',
    'line': 555,
}

# Books transaction examples.
B1 = {
    'date': datetime.date(2022, 1, 1),
    'amount': decimal.Decimal('10.00'),
    'payee': 'Patreon',
    'check_id': '',
    'filename': '2022/imports.beancount',
    'line': 777,
    'bank_statement': '',
}
B2 = {
    'date': datetime.date(2022, 1, 2),
    'amount': decimal.Decimal('20.00'),
    'payee': 'Linode',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 888,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_next_day = {
    'date': datetime.date(2022, 1, 4),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_next_week = {
    'date': datetime.date(2022, 1, 10),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_mismatch_amount = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('31.00'),
    'payee': 'USPS',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_payee_mismatch_1 = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'Credit X',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_payee_mismatch_2 = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'Credit Y',
    'check_id': '',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B3_unmatched_check_id = {
    'date': datetime.date(2022, 1, 3),
    'amount': decimal.Decimal('30.00'),
    'payee': 'USPS',
    'check_id': '1234',
    'filename': '2022/main.beancount',
    'line': 999,
    'bank_statement': "Financial/Bank-Statements/AMEX/2022-01-12_AMEX_statement.pdf"
}
B4A = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-250.00'),
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
    'check_id': '',
    'line': 1000,
}
B4B = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-250.00'),
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
    'check_id': '',
    'line': 1000,
}
B4C = {
    'date': datetime.date(2022, 8, 11),
    'amount': decimal.Decimal('-1760.00'),
    'payee': 'TRUST 0000000362 ACH Retirement Plan',
    'check_id': '',
    'line': 1000,
}


def test_one_exact_match():
    statement = [S1]
    books = [B1]
    assert match_statement_and_books(statement, books) == (
        # Match, match, notes.
        #
        # The matches are a list so we can implement subset-sum matching where
        # multiple books transactions may match to a single statement
        # transaction.
        [([S1], [B1], [])],
        [],
        [],
    )


def test_multiple_exact_matches():
    statement = [S1, S2]
    books = [B1, B2]
    assert match_statement_and_books(statement, books) == (
        [([S1], [B1], []), ([S2], [B2], [])],
        [],
        [],
    )


def test_one_mismatch():
    statement = [S1]
    books = []
    assert match_statement_and_books(statement, books) == (
        [],
        [S1],
        [],
    )


def test_multiple_mismatches():
    statement = [S1]
    books = [B2]
    assert match_statement_and_books(statement, books) == (
        [],
        [S1],
        [B2],
    )


def test_next_day_matches():
    statement = [S3]
    books = [B3_next_day]
    assert match_statement_and_books(statement, books) == (
        [([S3], [B3_next_day], ['+/- 1 days'])],
        [],
        [],
    )


def test_next_week_matches():
    statement = [S3]
    books = [B3_next_week]
    assert match_statement_and_books(statement, books) == (
        [([S3], [B3_next_week], ['+/- 7 days'])],
        [],
        [],
    )


def test_incorrect_amount_does_not_match():
    statement = [S3]
    books = [B3_mismatch_amount]
    assert match_statement_and_books(statement, books) == (
        [],
        [S3],
        [B3_mismatch_amount],
    )


def test_payee_mismatch_ok_when_only_one_that_amount_and_date():
    statement = [S3]
    books = [B3_payee_mismatch_1]
    assert match_statement_and_books(statement, books) == (
        [([S3], [B3_payee_mismatch_1], ['payee mismatch'])],
        [],
        [],
    )


def test_payee_mismatch_not_ok_when_multiple_that_amount_and_date():
    statement = [S3]
    books = [B3_payee_mismatch_1, B3_payee_mismatch_2]
    match = match_statement_and_books(statement, books)
    assert match == (
        [],
        [S3],
        [B3_payee_mismatch_1, B3_payee_mismatch_2],
    )


def test_remove_payee_junk():
    assert remove_payee_junk('WIDGETSRUS INC PAYMENT 1') == 'WIDGETSRUS'
    assert remove_payee_junk('0000010017') == '10017'


def test_date_proximity():
    assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23)) == 1.0
    assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=30)) == 0.5
    assert date_proximity(datetime.date(2021, 8, 23), datetime.date(2021, 8, 23) - datetime.timedelta(days=60)) == 0.0


def test_remove_duplicate_words():
    assert remove_duplicate_words('Hi Foo Kow FOO') == 'Hi Foo Kow'


def test_payee_matches_when_first_word_matches():
    assert payee_match('Gandi San Francisco', 'Gandi example.com renewal 1234567') == 1.0
    assert payee_match('USPS 123456789 Portland', 'USPS John Brown') == 0.8


def test_metadata_for_match(monkeypatch):
    monkeypatch.setenv('CONSERVANCY_REPOSITORY', '.')
    assert metadata_for_match(([S1], [B1], []), 'statement.pdf', 'statement.csv') == [
        ('2022/imports.beancount', 777, '    bank-statement: "statement.pdf"'),
        ('2022/imports.beancount', 777, '    bank-statement-csv: "statement.csv:222"'),
    ]


def test_no_metadata_if_no_matches():
    assert metadata_for_match(([S1], [], ['no match']), 'statement.pdf', 'statement.csv') == []
    assert metadata_for_match(([], [B1], ['no match']), 'statement.pdf', 'statement.csv') == []
    assert metadata_for_match(([S1], [B2], ['no match']), 'statement.pdf', 'statement.csv') == []


def test_write_to_books():
    books = textwrap.dedent("""\
        2021-08-16 txn "Gandi" "transfer seleniumconf.us"
          Liabilities:CreditCard:AMEX            -15.50 USD
          Expenses:Hosting                        15.50 USD""")
    f = tempfile.NamedTemporaryFile('w', delete=False)
    f.write(books)
    f.close()
    metadata = [(f.name, 2, '    bank-statement: statement.pdf')]
    write_metadata_to_books(metadata)
    with open(f.name) as f:
        output = f.read()
    assert output == textwrap.dedent("""\
        2021-08-16 txn "Gandi" "transfer seleniumconf.us"
          Liabilities:CreditCard:AMEX            -15.50 USD
            bank-statement: statement.pdf
          Expenses:Hosting                        15.50 USD""")
    os.remove(f.name)


def test_totals():
    assert totals([
        ([S1], [B1], []),
        ([S2], [], []),
        ([], [B3_next_day], []),
    ]) == (decimal.Decimal('10'), decimal.Decimal('20'), decimal.Decimal('30'))


def test_payee_not_considered_if_check_id_present():
    # These records match aside from check-id.
    statement = [S3]
    books = [B3_unmatched_check_id]
    assert match_statement_and_books(statement, books) == (
        [],
        [S3],
        [B3_unmatched_check_id],
    )


def test_subset_sum_match():
    statement = [S4]
    books = [B4A, B4B, B4C]
    assert subset_match(statement, books) == (
        [([S4], [B4A, B4B, B4C], [])],
        [],  # No remaining statement trans.
        [],  # No remaining books trans.
    )


def test_subset_passes_through_all_non_matches():
    """This was used to locate a bug where some of the non-matches had
    gone missing due to mutation of books_trans."""
    statement_trans = [
        S1,  # No match
        S4,  # Match
    ]
    books_trans = [
        B2,  # No match
        B4A, B4B, B4C,  # Match
        B3_next_day, B3_next_week,  # No match
    ]
    assert subset_match(statement_trans, books_trans) == (
        [([S4], [B4A, B4B, B4C], [])],  # Matched
        [S1],  # No match: preserved intact
        [B2, B3_next_day, B3_next_week]  # No match: preserved intact
    )


def test_subset_passes_though_unmatched_transactions_with_same_payee():
    # Tracy noticed that when multiple books payments had the same date and
    # payee and were unmatched, they were being displayed lumped together, when
    # they should have remained separate.
    B1a = {
        'date': datetime.date(2022, 1, 1),
        'amount': decimal.Decimal('100.00'),
        'payee': 'Hannah',
        'check_id': '',
        'filename': '2022/imports.beancount',
        'line': 777,
        'bank_statement': '',
    }
    B1b = {
        'date': datetime.date(2022, 1, 1),
        'amount': decimal.Decimal('100.00'),
        'payee': 'Hannah',
        'check_id': '',
        'filename': '2022/imports.beancount',
        'line': 797,
        'bank_statement': '',
    }
    assert subset_match([], [B1a, B1b]) == (
        [], [], [B1a, B1b],  # No match: two preserved intact
    )


def test_handles_amex_csv():
    CSV = """Date,Receipt,Description,Card Member,Account #,Amount,Extended Details,Appears On Your Statement As,Address,City/State,Zip Code,Country,Reference,Category\n08/19/2021,,Gandi.net           San Francisco,RODNEY R BROWN,-99999,28.15,"00000009999 00000009999999999999\nGandi.net\nSan Francisco\n00000009999999999999",Gandi.net           San Francisco,"NEPTUNUSSTRAAT 41-63\nHOOFDDORP",,2132 JA,NETHERLANDS (THE),'999999999999999999',Merchandise & Supplies-Internet Purchase\n"""
    expected = [
        {
            'date': datetime.date(2021, 8, 19),
            'amount': decimal.Decimal('-28.15'),
            'payee': 'Gandi San Francisco',
            'check_id': '',
            'line': 2,
        },
    ]
    assert read_amex_csv(io.StringIO(CSV)) == expected


def test_handles_fr_csv():
    CSV = """"DD99999999999","03/31/2022","LAST STATEMENT","","","$1,000.00"\n"9999999999999","04/01/2022","INCOMING WIRE","GONDOR S.S. A111111111BCDE0F","$6.50","$1,006.50"\n"DD99999999999","04/18/2022","CHECK  3741","","$-4.50","$1,002.00"\n"DD99999999999","04/30/2022","THIS STATEMENT","","","$102.00"\n"""
    expected = [
        {
            'date': datetime.date(2022, 4, 1),
            'amount': decimal.Decimal('6.50'),
            'payee': 'GONDOR S.S. A1111111',
            'check_id': '',
            'line': 2,
        },
        {
            'date': datetime.date(2022, 4, 18),
            'amount': decimal.Decimal('-4.50'),
            'payee': '',
            'check_id': '3741',
            'line': 3,
        },
    ]
    assert read_fr_csv(io.StringIO(CSV)) == expected


def test_format_output():
    statement = [S1]
    books = [B1]
    matches, _, _ = match_statement_and_books(statement, books)
    output = format_output(matches, datetime.date(2022, 1, 1), datetime.date(2022, 2, 1), 'test.csv', True)
    assert '2022-01-01:       10.00 Patreon         / Patreon   / 12345  →  2022-01-01:       10.00 Patreon                              ✓ Matched' in output


month_test_data = [
    ((datetime.date(2022, 1, 2), datetime.date(2022, 1, 30)),
     (datetime.date(2022, 1, 1), datetime.date(2022, 1, 31))),
    ((datetime.date(2022, 4, 2), datetime.date(2022, 4, 29)),
     (datetime.date(2022, 4, 1), datetime.date(2022, 4, 30))),
    ((datetime.date(2022, 2, 2), datetime.date(2022, 2, 27)),
     (datetime.date(2022, 2, 1), datetime.date(2022, 2, 28))),
    ((datetime.date(2024, 2, 2), datetime.date(2024, 2, 27)),
     (datetime.date(2024, 2, 1), datetime.date(2024, 2, 29))),
]


@pytest.mark.parametrize('input_dates,rounded_dates', month_test_data)
def test_rounds_to_full_month(input_dates, rounded_dates):
    assert round_to_month(*input_dates) == rounded_dates