reconcile: Update docs, move a few things around.
This commit is contained in:
		
							parent
							
								
									97a05003f3
								
							
						
					
					
						commit
						581ef0fa23
					
				
					 1 changed files with 222 additions and 165 deletions
				
			
		|  | @ -44,23 +44,25 @@ similar directives are already present. This is a bit like diff-ing a | |||
| statement with the books (though we're only interested in the presence | ||||
| of lines, not so much their order). | ||||
| 
 | ||||
| Paper checks are entered in the books when written (a.k.a. "posted"), | ||||
| but may not be cashed until months later sometimes causing | ||||
| reconciliation differences that live beyond a month. It's worth noting | ||||
| that there are really two dates here - the posting date and the | ||||
| cleared date. Beancount only allows us to model one, which is why | ||||
| carrying these reconciliation differences between months feels a bit | ||||
| awkward. | ||||
| 
 | ||||
| Problems in scope: | ||||
| 
 | ||||
|  - errors in the books take hours to find during reconciliation, | ||||
|    requiring manually comparing statemnts and the books and are | ||||
|    requiring manually comparing statements and the books and are | ||||
|    succeptible to mistakes, such as not noticing when there are two | ||||
|    payments for the same amount on the statement, but not in the books | ||||
|    ("you're entering a world of pain") | ||||
|    (as Bradley likes to quote, "you're entering a world of pain") | ||||
| 
 | ||||
|  - adding statement/reconciliation metadata to books is/was manual and | ||||
|    prone to mistakes | ||||
| 
 | ||||
|  - Beancount doesn't provide any infrastructure for programmatically | ||||
|    updating the books, only appending in the case of importers | ||||
| 
 | ||||
|  - paper checks are entered in the books when written, but may not be | ||||
|    cashed until months later (reconcile errors) | ||||
| 
 | ||||
|  - jumping to an individual transaction in a large ledger isn't | ||||
|    trivial - Emacs grep mode is the current best option | ||||
| 
 | ||||
|  | @ -244,6 +246,7 @@ def standardize_beancount_record(row) -> Dict:  # type: ignore[no-untyped-def] | |||
| 
 | ||||
| 
 | ||||
| def format_record(record: dict) -> str: | ||||
|     """Generate output lines for a standard 1:1 match.""" | ||||
|     if record['payee'] and record['check_id']: | ||||
|         output = f"{record['date'].isoformat()}: {record['amount']:11,.2f} {record['payee'][:25]} #{record['check_id']}".ljust(59) | ||||
|     elif record['payee']: | ||||
|  | @ -254,6 +257,7 @@ def format_record(record: dict) -> str: | |||
| 
 | ||||
| 
 | ||||
| def format_multirecord(r1s: list[dict], r2s: list[dict], note: str) -> list[list]: | ||||
|     """Generates output lines for one statement:multiple books transaction match.""" | ||||
|     assert len(r1s) == 1 | ||||
|     assert len(r2s) > 1 | ||||
|     match_output = [] | ||||
|  | @ -268,6 +272,13 @@ def sort_records(records: List) -> List: | |||
| 
 | ||||
| 
 | ||||
| def first_word_exact_match(a: str, b: str) -> float: | ||||
|     """Score a payee match based first word. | ||||
| 
 | ||||
|     We get a whole lot of good matches this way. Helps in the | ||||
|     situation where the first word or two of a transaction description | ||||
|     is useful and the rest is garbage. | ||||
| 
 | ||||
|     """ | ||||
|     if len(a) == 0 or len(b) == 0: | ||||
|         return 0.0 | ||||
|     first_a = a.split()[0].strip() | ||||
|  | @ -279,6 +290,7 @@ def first_word_exact_match(a: str, b: str) -> float: | |||
| 
 | ||||
| 
 | ||||
| def payee_match(a: str, b: str) -> float: | ||||
|     """Score a match between two payees.""" | ||||
|     fuzzy_match = float(fuzz.token_set_ratio(a, b) / 100.00) | ||||
|     first_word_match = first_word_exact_match(a, b) | ||||
|     return max(fuzzy_match, first_word_match) | ||||
|  | @ -286,7 +298,6 @@ def payee_match(a: str, b: str) -> float: | |||
| 
 | ||||
| def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]: | ||||
|     """Do these records represent the same transaction?""" | ||||
| 
 | ||||
|     date_score = date_proximity(r1['date'], r2['date']) | ||||
|     if r1['date'] == r2['date']: | ||||
|         date_message = '' | ||||
|  | @ -329,11 +340,20 @@ def records_match(r1: Dict, r2: Dict) -> Tuple[float, List[str]]: | |||
| 
 | ||||
| 
 | ||||
| def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: | ||||
|     """ | ||||
|     Runs through all the statement transactions to find a matching transaction | ||||
|     in the books. If found, the books transaction is marked off so that it can | ||||
|     only be matched once. Some transactions will be matched, some will be on the | ||||
|     statement but not the books and some on the books but not the statement. | ||||
|     """Match transactions between the statement and books. | ||||
| 
 | ||||
|     If matched, the books transaction is marked off so that it can | ||||
|     only be matched once. Some transactions will be matched, some will | ||||
|     be on the statement but not the books and some on the books but | ||||
|     not the statement. | ||||
| 
 | ||||
|     Passes through any unmatched transactions. | ||||
| 
 | ||||
|     Currently we use the same matching logic for all types of | ||||
|     statements. It's conceivable that you could have special cases to | ||||
|     accurately match some types of statements, but that would be more | ||||
|     work to maintain and test. | ||||
| 
 | ||||
|     """ | ||||
|     matches = [] | ||||
|     remaining_books_trans = [] | ||||
|  | @ -363,141 +383,14 @@ def match_statement_and_books(statement_trans: List[Dict], books_trans: List[Dic | |||
|     return matches, remaining_statement_trans, remaining_books_trans | ||||
| 
 | ||||
| 
 | ||||
| # TODO: Return list of tuples (instead of list of lists). | ||||
| def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: | ||||
|     """Match single statement transactions with multiple books transactions. | ||||
| 
 | ||||
| def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]: | ||||
|     match_output = [] | ||||
|     for r1s, r2s, note in matches: | ||||
|         note = ', '.join(note) | ||||
|         note = ': ' + note if note else note | ||||
|         if r1s and r2s: | ||||
|             if show_reconciled_matches or not all(x['bank_statement'] for x in r2s): | ||||
|                 if len(r2s) == 1: | ||||
|                     entry = [r1s[0]['date'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}'] | ||||
|                     if 'payee mismatch' in note: | ||||
|                         entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL | ||||
|                     match_output.append(entry) | ||||
|                 else: | ||||
|                     match_output.extend(format_multirecord(r1s, r2s, note)) | ||||
|         elif r1s: | ||||
|             match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL]) | ||||
|         else: | ||||
|             match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59}  →  {format_record(r2s[0])}  ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL]) | ||||
|     return match_output | ||||
| 
 | ||||
| 
 | ||||
| def date_proximity(d1: datetime.date, d2: datetime.date) -> float: | ||||
|     diff = abs(int((d1 - d2).days)) | ||||
|     if diff > 60: | ||||
|         return 0.0 | ||||
|     else: | ||||
|         return 1.0 - (diff / 60.0) | ||||
| 
 | ||||
| 
 | ||||
| def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]: | ||||
|     # Can we really ever have multiple statement entries? Probably not. | ||||
|     statement_filename = get_repo_relative_path(statement_filename) | ||||
|     csv_filename = get_repo_relative_path(csv_filename) | ||||
|     metadata = [] | ||||
|     statement_entries, books_entries, _ = match | ||||
|     for books_entry in books_entries: | ||||
|         for statement_entry in statement_entries: | ||||
|             if not books_entry['bank_statement']: | ||||
|                 metadata.append((books_entry['filename'], books_entry['line'], f'    bank-statement: "{statement_filename}"')) | ||||
|                 metadata.append((books_entry['filename'], books_entry['line'], f'    bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"')) | ||||
|     return metadata | ||||
| 
 | ||||
| 
 | ||||
| def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None: | ||||
|     """Insert reconciliation metadata in the books files. | ||||
| 
 | ||||
|     Takes a list of edits to make as tuples of form (filename, lineno, metadata): | ||||
| 
 | ||||
|     [ | ||||
|         ('2021/main.beancount', 4245, '    bank-statement: statement.pdf'), | ||||
|         ('2021/main.beancount', 1057, '    bank-statement: statement.pdf'), | ||||
|         ('2021/payroll.beancount', 257, '    bank-statement: statement.pdf'), | ||||
|         ..., | ||||
|     ] | ||||
|     Works similarly to match_statement_and_books in that it returns a | ||||
|     list of matches and lists of remaining statement and books | ||||
|     transactions. | ||||
| 
 | ||||
|     """ | ||||
|     file_contents: dict[str, list] = {} | ||||
|     file_offsets: dict[str, int] = collections.defaultdict(int) | ||||
|     # Load each books file into memory and insert the relevant metadata lines. | ||||
|     # Line numbers change as we do this, so we keep track of the offset for each | ||||
|     # file. Changes must be sorted by line number first or else the offsets will | ||||
|     # break because we're jumping around making edits. | ||||
|     for filename, line, metadata in sorted(metadata_to_apply): | ||||
|         if filename not in file_contents: | ||||
|             with open(filename, 'r') as f: | ||||
|                 file_contents[filename] = f.readlines() | ||||
|         # Insert is inefficient, but fast enough for now in practise. | ||||
|         file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n') | ||||
|         file_offsets[filename] += 1 | ||||
|     # Writes each updated file back to disk. | ||||
|     for filename, contents in file_contents.items(): | ||||
|         with open(filename, 'w') as f: | ||||
|             f.writelines(contents) | ||||
|             print(f'Wrote {filename}.') | ||||
| 
 | ||||
| 
 | ||||
| def get_repo_relative_path(path: str) -> str: | ||||
|     return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY')) | ||||
| 
 | ||||
| 
 | ||||
| def parse_path(path: str) -> str: | ||||
|     if not os.path.exists(path): | ||||
|         raise argparse.ArgumentTypeError(f'File {path} does not exist.') | ||||
|     return path | ||||
| 
 | ||||
| 
 | ||||
| def parse_repo_relative_path(path: str) -> str: | ||||
|     if not os.path.exists(path): | ||||
|         raise argparse.ArgumentTypeError(f'File {path} does not exist.') | ||||
|     repo = os.getenv('CONSERVANCY_REPOSITORY') | ||||
|     if not repo: | ||||
|         raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.') | ||||
|     if not path.startswith(repo): | ||||
|         raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.') | ||||
|     return path | ||||
| 
 | ||||
| 
 | ||||
| def parse_decimal_with_separator(number_text: str) -> decimal.Decimal: | ||||
|     """decimal.Decimal can't parse numbers with thousands separator.""" | ||||
|     number_text = number_text.replace(',', '') | ||||
|     return decimal.Decimal(number_text) | ||||
| 
 | ||||
| 
 | ||||
| def parse_arguments(argv: List[str]) -> argparse.Namespace: | ||||
|     parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper') | ||||
|     cliutil.add_version_argument(parser) | ||||
|     cliutil.add_loglevel_argument(parser) | ||||
|     parser.add_argument('--beancount-file', required=True, type=parse_path) | ||||
|     parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path) | ||||
|     parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path) | ||||
|     parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX') | ||||
|     # parser.add_argument('--report-group-regex') | ||||
|     parser.add_argument('--show-reconciled-matches', action='store_true') | ||||
|     parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")    # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals") | ||||
|     args = parser.parse_args(args=argv) | ||||
|     return args | ||||
| 
 | ||||
| 
 | ||||
| def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]: | ||||
|     total_matched = decimal.Decimal(0) | ||||
|     total_missing_from_books = decimal.Decimal(0) | ||||
|     total_missing_from_statement = decimal.Decimal(0) | ||||
|     for statement_entries, books_entries, _ in matches: | ||||
|         if statement_entries and books_entries: | ||||
|             total_matched += sum(c['amount'] for c in statement_entries) | ||||
|         elif statement_entries: | ||||
|             total_missing_from_books += sum(c['amount'] for c in statement_entries) | ||||
|         else: | ||||
|             total_missing_from_statement += sum(c['amount'] for c in books_entries) | ||||
|     return total_matched, total_missing_from_books, total_missing_from_statement | ||||
| 
 | ||||
| 
 | ||||
| def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[List[Tuple[List, List, List]], List[Dict], List[Dict]]: | ||||
|     matches = [] | ||||
|     remaining_books_trans = [] | ||||
|     remaining_statement_trans = [] | ||||
|  | @ -531,7 +424,171 @@ def subset_match(statement_trans: List[dict], books_trans: List[dict]) -> Tuple[ | |||
|     return matches, remaining_statement_trans, remaining_books_trans | ||||
| 
 | ||||
| 
 | ||||
| # TODO: Return list of tuples (instead of list of lists). | ||||
| 
 | ||||
| def format_matches(matches: List, csv_statement: str, show_reconciled_matches: bool) -> List[List]: | ||||
| 
 | ||||
|     """Produce a list of body output lines from the given matches. | ||||
| 
 | ||||
|     The first column is a date so we can re-sort the list to put the | ||||
|     missing entries in the right place. The second column is the text | ||||
|     output. | ||||
| 
 | ||||
|     """ | ||||
|     match_output = [] | ||||
|     for r1s, r2s, note in matches: | ||||
|         note = ', '.join(note) | ||||
|         note = ': ' + note if note else note | ||||
|         if r1s and r2s: | ||||
|             if show_reconciled_matches or not all(x['bank_statement'] for x in r2s): | ||||
|                 if len(r2s) == 1: | ||||
|                     entry = [r1s[0]['date'], f'{format_record(r1s[0])}  →  {format_record(r2s[0])}  ✓ Matched{note}'] | ||||
|                     if 'payee mismatch' in note: | ||||
|                         entry[1] = Fore.YELLOW + Style.BRIGHT + entry[1] + Style.RESET_ALL | ||||
|                     match_output.append(entry) | ||||
|                 else: | ||||
|                     match_output.extend(format_multirecord(r1s, r2s, note)) | ||||
|         elif r1s: | ||||
|             match_output.append([r1s[0]['date'], Fore.RED + Style.BRIGHT + f'{format_record(r1s[0])}  →  {" ":^59}  ✗ NOT IN BOOKS ({os.path.basename(csv_statement)}:{r1s[0]["line"]})' + Style.RESET_ALL]) | ||||
|         else: | ||||
|             match_output.append([r2s[0]['date'], Fore.RED + Style.BRIGHT + f'{" ":^59}  →  {format_record(r2s[0])}  ✗ NOT ON STATEMENT ({os.path.basename(r2s[0]["filename"])}:{r2s[0]["line"]})' + Style.RESET_ALL]) | ||||
|     return match_output | ||||
| 
 | ||||
| 
 | ||||
| def date_proximity(d1: datetime.date, d2: datetime.date) -> float: | ||||
|     """Scores two days based on how close they are together.""" | ||||
|     ZERO_CUTOFF = 60  # Score will be zero for this many days apart. | ||||
|     diff = abs(int((d1 - d2).days)) | ||||
|     if diff >= ZERO_CUTOFF: | ||||
|         return 0.0 | ||||
|     else: | ||||
|         return 1.0 - (diff / ZERO_CUTOFF) | ||||
| 
 | ||||
| 
 | ||||
| def metadata_for_match(match: Tuple[List, List, List], statement_filename: str, csv_filename: str) -> List[Tuple[str, int, str]]: | ||||
|     """Returns the bank-statement metadata that should be applied for a match.""" | ||||
|     # TODO: Our data structure would allow multiple statement entries | ||||
|     # for a match, but would this ever make sense? Probably not. | ||||
|     statement_filename = get_repo_relative_path(statement_filename) | ||||
|     csv_filename = get_repo_relative_path(csv_filename) | ||||
|     metadata = [] | ||||
|     statement_entries, books_entries, _ = match | ||||
|     for books_entry in books_entries: | ||||
|         for statement_entry in statement_entries: | ||||
|             if not books_entry['bank_statement']: | ||||
|                 metadata.append((books_entry['filename'], books_entry['line'], f'    bank-statement: "{statement_filename}"')) | ||||
|                 metadata.append((books_entry['filename'], books_entry['line'], f'    bank-statement-csv: "{csv_filename}:{statement_entry["line"]}"')) | ||||
|     return metadata | ||||
| 
 | ||||
| 
 | ||||
| def write_metadata_to_books(metadata_to_apply: List[Tuple[str, int, str]]) -> None: | ||||
|     """Insert reconciliation metadata in the books files. | ||||
| 
 | ||||
|     Takes a list of edits to make as tuples of form (filename, lineno, metadata): | ||||
| 
 | ||||
|     [ | ||||
|         ('2021/main.beancount', 4245, '    bank-statement: statement.pdf'), | ||||
|         ('2021/main.beancount', 1057, '    bank-statement: statement.pdf'), | ||||
|         ('2021/payroll.beancount', 257, '    bank-statement: statement.pdf'), | ||||
|         ..., | ||||
|     ] | ||||
| 
 | ||||
|     Beancount doesn't provide any infrastructure for programmatically | ||||
|     updating the books, only appending in the case of importers. So | ||||
|     we're on our own here. | ||||
|     """ | ||||
|     file_contents: dict[str, list] = {} | ||||
|     file_offsets: dict[str, int] = collections.defaultdict(int) | ||||
|     # Load each books file into memory and insert the relevant metadata lines. | ||||
|     # Line numbers change as we do this, so we keep track of the offset for each | ||||
|     # file. Changes must be sorted by line number first or else the offsets will | ||||
|     # break because we're jumping around making edits. | ||||
|     for filename, line, metadata in sorted(metadata_to_apply): | ||||
|         if filename not in file_contents: | ||||
|             with open(filename, 'r') as f: | ||||
|                 file_contents[filename] = f.readlines() | ||||
|         # Insert is inefficient, but fast enough for now in practise. | ||||
|         file_contents[filename].insert(line + file_offsets[filename], metadata.rstrip() + '\n') | ||||
|         file_offsets[filename] += 1 | ||||
|     # Writes each updated file back to disk. | ||||
|     for filename, contents in file_contents.items(): | ||||
|         with open(filename, 'w') as f: | ||||
|             f.writelines(contents) | ||||
|             print(f'Wrote {filename}.') | ||||
| 
 | ||||
| 
 | ||||
| def get_repo_relative_path(path: str) -> str: | ||||
|     """Chop off the unique per-person CONSERVANCY_REPOSITORY. | ||||
| 
 | ||||
|     CSV and PDF statement metadata should be relative to | ||||
|     CONSERVANCY_REPOSITORY ie. without regards to exactly where on | ||||
|     your computer all the files live. | ||||
| 
 | ||||
|     """ | ||||
|     return os.path.relpath(path, start=os.getenv('CONSERVANCY_REPOSITORY')) | ||||
| 
 | ||||
| 
 | ||||
| def parse_path(path: str) -> str: | ||||
|     """Validate that a file exists for use in argparse.""" | ||||
|     if not os.path.exists(path): | ||||
|         raise argparse.ArgumentTypeError(f'File {path} does not exist.') | ||||
|     return path | ||||
| 
 | ||||
| 
 | ||||
| def parse_repo_relative_path(path: str) -> str: | ||||
|     """Validate that a file exists and is within $CONSERVANCY_REPOSITORY. | ||||
| 
 | ||||
|     For use with argparse. | ||||
| 
 | ||||
|     """ | ||||
|     if not os.path.exists(path): | ||||
|         raise argparse.ArgumentTypeError(f'File {path} does not exist.') | ||||
|     repo = os.getenv('CONSERVANCY_REPOSITORY') | ||||
|     if not repo: | ||||
|         raise argparse.ArgumentTypeError('$CONSERVANCY_REPOSITORY is not set.') | ||||
|     if not path.startswith(repo): | ||||
|         raise argparse.ArgumentTypeError(f'File {path} does not share a common prefix with $CONSERVANCY_REPOSITORY {repo}.') | ||||
|     return path | ||||
| 
 | ||||
| 
 | ||||
| def parse_decimal_with_separator(number_text: str) -> decimal.Decimal: | ||||
|     """decimal.Decimal can't parse numbers with thousands separator.""" | ||||
|     number_text = number_text.replace(',', '') | ||||
|     return decimal.Decimal(number_text) | ||||
| 
 | ||||
| 
 | ||||
| def parse_arguments(argv: List[str]) -> argparse.Namespace: | ||||
|     parser = argparse.ArgumentParser(prog=PROGNAME, description='Reconciliation helper') | ||||
|     cliutil.add_version_argument(parser) | ||||
|     cliutil.add_loglevel_argument(parser) | ||||
|     parser.add_argument('--beancount-file', required=True, type=parse_path) | ||||
|     parser.add_argument('--csv-statement', required=True, type=parse_repo_relative_path) | ||||
|     parser.add_argument('--bank-statement', required=True, type=parse_repo_relative_path) | ||||
|     parser.add_argument('--account', required=True, help='eg. Liabilities:CreditCard:AMEX') | ||||
|     # parser.add_argument('--report-group-regex') | ||||
|     parser.add_argument('--show-reconciled-matches', action='store_true') | ||||
|     parser.add_argument('--non-interactive', action='store_true', help="Don't prompt to write to the books")    # parser.add_argument('--statement-balance', type=parse_decimal_with_separator, required=True, help="A.K.A \"cleared balance\" taken from the end of the period on the PDF statement. Required because CSV statements don't include final or running totals") | ||||
|     args = parser.parse_args(args=argv) | ||||
|     return args | ||||
| 
 | ||||
| 
 | ||||
| def totals(matches: List[Tuple[List, List, List]]) -> Tuple[decimal.Decimal, decimal.Decimal, decimal.Decimal]: | ||||
|     """Calculate the totals of transactions matched/not-matched.""" | ||||
|     total_matched = decimal.Decimal(0) | ||||
|     total_missing_from_books = decimal.Decimal(0) | ||||
|     total_missing_from_statement = decimal.Decimal(0) | ||||
|     for statement_entries, books_entries, _ in matches: | ||||
|         if statement_entries and books_entries: | ||||
|             total_matched += sum(c['amount'] for c in statement_entries) | ||||
|         elif statement_entries: | ||||
|             total_missing_from_books += sum(c['amount'] for c in statement_entries) | ||||
|         else: | ||||
|             total_missing_from_statement += sum(c['amount'] for c in books_entries) | ||||
|     return total_matched, total_missing_from_books, total_missing_from_statement | ||||
| 
 | ||||
| 
 | ||||
| def process_unmatched(statement_trans: List[dict], books_trans: List[dict]) -> List[Tuple[List, List, List]]: | ||||
|     """Format the remaining unmatched transactions to be added to one single list of matches.""" | ||||
|     matches: List[Tuple[List, List, List]] = [] | ||||
|     for r1 in statement_trans: | ||||
|         matches.append(([r1], [], ['no match'])) | ||||
|  | @ -551,11 +608,8 @@ def main(arglist: Optional[Sequence[str]] = None, | |||
|         config = configmod.Config() | ||||
|         config.load_file() | ||||
| 
 | ||||
|     # TODO: Should put in a sanity check to make sure the statement you're feeding | ||||
|     # in matches the account you've provided. | ||||
| 
 | ||||
|     # TODO: Can we open the files first, then pass the streams on to the rest of the program? | ||||
| 
 | ||||
|     # Validate and normalise the statement into our standard | ||||
|     # transaction data structure. | ||||
|     if 'AMEX' in args.account: | ||||
|         validate_csv = validate_amex_csv | ||||
|         standardize_statement_record = standardize_amex_record | ||||
|  | @ -569,40 +623,43 @@ def main(arglist: Optional[Sequence[str]] = None, | |||
|         f.seek(0) | ||||
|         statement_trans = read_transactions_from_csv(f, standardize_statement_record) | ||||
| 
 | ||||
|     # Dates are taken from the beginning/end of the statement. | ||||
|     begin_date = statement_trans[0]['date'] | ||||
|     end_date = statement_trans[-1]['date'] | ||||
| 
 | ||||
|     # Do we traverse and filter the in-memory entries list and filter that, or do we | ||||
|     # use Beancount Query Language (BQL) to get a list of transactions? Currently | ||||
|     # using BQL. | ||||
|     # Query for the Beancount books data for this above period. | ||||
|     # | ||||
|     # There are pros and cons for using Beancount's in-memory entries | ||||
|     # list directly and also for using Beancount Query Language (BQL) | ||||
|     # to get a list of transactions? Using BQL because it's | ||||
|     # convenient, but we don't have access to the full transaction | ||||
|     # entry objects. Feels a bit strange that these approaches are so | ||||
|     # disconnected. | ||||
|     # | ||||
|     # beancount.query.query_compile.compile() and | ||||
|     # beancount.query.query_execute.filter_entries() look useful in this respect, | ||||
|     # but I'm not clear on how to use compile(). An example would help. | ||||
|     entries, _, options = loader.load_file(args.beancount_file) | ||||
| 
 | ||||
|     # books_balance_query = f"""SELECT sum(COST(position)) AS aa WHERE account = "{args.account}" | ||||
|     #     AND date <= {end_date.isoformat()}""" | ||||
|     # _, result_rows = run_query(entries, options, books_balance_query, numberify=True) | ||||
|     # books_balance = result_rows[0][0] if result_rows else 0 | ||||
| 
 | ||||
|     # String concatenation looks bad, but there's no SQL injection possible here | ||||
|     # because BQL can't write back to the Beancount files. I hope! | ||||
|     query = f'SELECT filename, META("lineno") AS line, META("bank-statement") AS bank_statement, date, number(cost(position)), payee, ENTRY_META("entity") as entity, ANY_META("check-id") as check_id, narration where account = "{args.account}" and date >= {begin_date} and date <= {end_date}' | ||||
|     _, result_rows = run_query(entries, options, query) | ||||
| 
 | ||||
|     books_trans = sort_records([standardize_beancount_record(row) for row in result_rows]) | ||||
| 
 | ||||
|     # Apply two passes of matching, one for standard matches and one | ||||
|     # for subset matches. | ||||
|     matches, remaining_statement_trans, remaining_books_trans = match_statement_and_books(statement_trans, books_trans) | ||||
|     subset_matches, remaining_statement_trans, remaining_books_trans = subset_match(remaining_statement_trans, remaining_books_trans) | ||||
|     subset_matches, remaining_statement_trans, remaining_books_trans = subset_match( | ||||
|         remaining_statement_trans, remaining_books_trans) | ||||
|     matches.extend(subset_matches) | ||||
| 
 | ||||
|     # Add the remaining unmatched to make one big list of matches, successful or not. | ||||
|     unmatched = process_unmatched(remaining_statement_trans, remaining_books_trans) | ||||
|     matches.extend(unmatched) | ||||
| 
 | ||||
|     # Print out results of our matching. | ||||
|     match_output = format_matches(matches, args.csv_statement, args.show_reconciled_matches) | ||||
| 
 | ||||
|     _, total_missing_from_books, total_missing_from_statement = totals(matches) | ||||
| 
 | ||||
|     print('-' * 155) | ||||
|     statement_heading = f'Statement transactions {begin_date} to {end_date}' | ||||
|     print(f'{statement_heading:<52}            {"Books transactions":<58}   Notes') | ||||
|  | @ -615,7 +672,7 @@ def main(arglist: Optional[Sequence[str]] = None, | |||
|     print(f'Total:                      {total_missing_from_statement + total_missing_from_books:12,.2f}') | ||||
|     print('-' * 155) | ||||
| 
 | ||||
|     # Write statement metadata back to books | ||||
|     # Write statement metadata back to the books. | ||||
|     metadata_to_apply = [] | ||||
|     for match in matches: | ||||
|         metadata_to_apply.extend(metadata_for_match(match, args.bank_statement, args.csv_statement)) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue