diff --git a/conservancy_beancount/reconcile/__init__.py b/conservancy_beancount/reconcile/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/conservancy_beancount/reconcile/paypal.py b/conservancy_beancount/reconcile/paypal.py new file mode 100644 index 0000000..9bff2ca --- /dev/null +++ b/conservancy_beancount/reconcile/paypal.py @@ -0,0 +1,414 @@ +#!/usr/bin/env python3 +"""paypal.py - Reconcile PayPal account from CSV statement""" +# Copyright © 2021 Brett Smith +# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0 +# +# Full copyright and licensing details can be found at toplevel file +# LICENSE.txt in the repository. + +import argparse +import collections +import csv +import datetime +import enum +import logging +import operator +import os +import sys +import unicodedata + +from decimal import Decimal +from pathlib import Path + +import odf.style # type:ignore[import] +import odf.table # type:ignore[import] + +from beancount.parser import printer as bc_printer + +from .. import books +from .. import cliutil +from .. import config as configmod +from .. import data +from ..ranges import DateRange +from ..reports import core + +from typing import ( + Any, + Hashable, + Iterable, + Iterator, + List, + Mapping, + NamedTuple, + Optional, + Sequence, + TextIO, + Tuple, + Union, +) + +PROGNAME = 'reconcile-paypal' +logger = logging.getLogger('conservancy_beancount.reconcile.paypal') + +class ReconcileProblems(enum.IntFlag): + NOT_IN_STATEMENT = enum.auto() + DUP_IN_STATEMENT = enum.auto() + AMOUNT_DIFF = enum.auto() + MONTH_DIFF = enum.auto() + DATE_DIFF = enum.auto() + + +class PayPalPosting(NamedTuple): + filename: str + lineno: int + txn_id: str + date: datetime.date + amount: data.Amount + description: str + entity: Optional[str] + + @classmethod + def from_books(cls, post: data.Posting) -> 'PayPalPosting': + return cls( + post.meta.get('filename', ''), + post.meta.get('lineno', 0), + post.meta.get('paypal-id', ''), + post.meta.date, + post.units, + post.meta.txn.narration, + post.meta.get('entity'), + ) + + @classmethod + def from_statement( + cls, + row: Mapping[str, str], + filename: str='', + lineno: int=0, + ) -> 'PayPalPosting': + row_net = Decimal(row['Net'].replace(',', '')) + return cls( + filename, + lineno, + row['Transaction ID'], + datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(), + data.Amount(row_net, row['Currency']), + row['Name'], + None, + ) + + @classmethod + def iter_statement(cls, path: Path) -> Iterator['PayPalPosting']: + with path.open() as csv_file: + # PayPal's CSVs start with a ZWNJ space. + # Discard that, assuming it's still there. + orig_pos = csv_file.tell() + char1 = csv_file.read(1) + if not unicodedata.category(char1).startswith('C'): + csv_file.seek(orig_pos) + in_csv = csv.DictReader(csv_file) + filename = str(path) + for lineno, row in enumerate(in_csv, 2): + yield cls.from_statement(row, filename, lineno) + + def compare(self, other: 'PayPalPosting') -> int: + retval = 0 + if (self.date.year != other.date.year + or self.date.month != other.date.month): + retval |= ReconcileProblems.MONTH_DIFF + retval |= ReconcileProblems.DATE_DIFF + elif self.date.day != other.date.day: + retval |= ReconcileProblems.DATE_DIFF + return retval + + +class PayPalReconciler: + __slots__ = ['books_posts', 'statement_posts'] + + def __init__(self) -> None: + self.books_posts: List[PayPalPosting] = [] + self.statement_posts: List[PayPalPosting] = [] + + def add_from_books(self, post: PayPalPosting) -> None: + self.books_posts.append(post) + + def add_from_statement(self, post: PayPalPosting) -> None: + self.statement_posts.append(post) + + def post_problems(self) -> Iterator[Tuple[PayPalPosting, int]]: + try: + statement_post = self.statement_posts[0] + except IndexError: + try: + statement_post = self.books_posts[0] + except IndexError: + return + for post in self.books_posts: + yield (post, statement_post.compare(post)) + + def problems(self) -> int: + stmt_count = len(self.statement_posts) + if stmt_count == 0: + return ReconcileProblems.NOT_IN_STATEMENT + elif stmt_count > 1: + return ReconcileProblems.DUP_IN_STATEMENT + else: + balance = core.Balance(post.amount for post in self.books_posts) + if not (balance - self.statement_posts[0].amount).is_zero(): + return ReconcileProblems.AMOUNT_DIFF + worst_problem = 0 + for _, problems in self.post_problems(): + if problems & ~ReconcileProblems.DATE_DIFF: + return problems + else: + worst_problem = worst_problem or problems + return worst_problem + + def sort_key(self) -> Hashable: + try: + post = self.statement_posts[0] + except IndexError: + post = self.books_posts[0] + return post.date + + +class PayPalReconciliationReport(core.BaseODS[PayPalPosting, str]): + COLUMN_WIDTHS = { + 'Source': 1.5, + 'Transaction ID': 1.5, + 'Date': None, + 'Amount': 1, + 'Description': 2.5, + 'Entity': 1.5, + } + + def __init__(self) -> None: + super().__init__() + self.bad_sheet = self.start_sheet("Reconciliation") + self.good_sheet = self.start_sheet("Matched Postings") + + def init_styles(self) -> None: + super().init_styles() + self.error_style = self.bgcolor_style('#ffcc99') + self.warning_style = self.bgcolor_style('#ffff00') + self.endrow_style = self.border_style(core.Border.BOTTOM, '1pt') + + def start_sheet(self, name: str) -> odf.table.Table: + self.use_sheet(name) + for col_width in self.COLUMN_WIDTHS.values(): + col_style = None if col_width is None else self.column_style(col_width) + self.sheet.addElement(odf.table.TableColumn(stylename=col_style)) + self.add_row(*( + self.string_cell(col_name, stylename=self.style_bold) + for col_name in self.COLUMN_WIDTHS + )) + self.lock_first_row() + return self.sheet + + def section_key(self, row: PayPalPosting) -> str: + return row.txn_id + + def write_row( + self, + row: PayPalPosting, + source_style: Union[None, str, odf.style.Style]=None, + problems: int=0, + ) -> None: + source_style = self.merge_styles(source_style, self.style_endtext) + if problems & ReconcileProblems.MONTH_DIFF: + date_style: Optional[odf.style.Style] = self.error_style + elif problems & ReconcileProblems.DATE_DIFF: + date_style = self.warning_style + else: + date_style = None + date_style = self.merge_styles(date_style, self.style_date) + if problems & ReconcileProblems.AMOUNT_DIFF: + amount_style: Optional[odf.style.Style] = self.error_style + else: + amount_style = None + amount_style = self.merge_styles( + amount_style, self.currency_style(row.amount.currency), + ) + self.add_row( + self.string_cell(f'{row.filename}:{row.lineno}', stylename=source_style), + self.string_cell(row.txn_id), + self.date_cell(row.date, stylename=date_style), + self.currency_cell(row.amount, stylename=amount_style), + self.string_cell(row.description), + self.string_cell(row.entity or ''), + ) + + def write_missing(self, source: str, txn_id: str) -> None: + self.add_row( + self.string_cell(source, stylename=self.error_style), + self.string_cell(txn_id), + odf.table.TableCell(), + odf.table.TableCell(), + self.string_cell(f"not found in {source}"), + odf.table.TableCell(), + ) + + def write_all(self, source: Mapping[str, PayPalReconciler]) -> None: + for txn_id, reconciler in sorted( + source.items(), key=lambda kv: kv[1].sort_key(), + ): + problems = reconciler.problems() + if problems & ~ReconcileProblems.DATE_DIFF: + self.sheet = self.bad_sheet + else: + self.sheet = self.good_sheet + posts_iter = iter(reconciler.statement_posts) + try: + statement_post = next(posts_iter) + except StopIteration: + self.write_missing('statement', txn_id) + else: + self.write_row(statement_post) + for post in posts_iter: + stmt_problems = statement_post.compare(post) + if post.amount != statement_post.amount: + stmt_problems |= ReconcileProblems.AMOUNT_DIFF + self.write_row(statement_post, self.error_style, stmt_problems) + if self.sheet is self.bad_sheet: + probs_iter = reconciler.post_problems() + try: + post, post_problems = next(probs_iter) + except StopIteration: + self.write_missing('books', txn_id) + else: + self.write_row(post, None, problems | post_problems) + for post, post_problems in probs_iter: + self.write_row(post, None, problems | post_problems) + for cell in self.sheet.lastChild.childNodes: + cell_style = self.merge_styles( + cell.getAttribute('stylename'), + self.endrow_style, + ) + cell.setAttribute('stylename', cell_style) + + +def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace: + parser = argparse.ArgumentParser() + cliutil.add_version_argument(parser) + cliutil.add_loglevel_argument(parser) + parser.add_argument( + '--account', + default='Assets:PayPal', + help="""Name of the Beancount account with PayPal postings. +Default %(default)r. +""") + parser.add_argument( + '--begin', '--start', '-b', + dest='start_date', + metavar='DATE', + type=cliutil.date_arg, + help="""Date to start reconciliation, inclusive, in YYYY-MM-DD format. +The default is the first date in the statement. +""") + parser.add_argument( + '--end', '--stop', '-e', + dest='stop_date', + metavar='DATE', + type=cliutil.date_arg, + help="""Date to stop reconciliation, exclusive, in YYYY-MM-DD format. +The default is the last date in the statement. +""") + parser.add_argument( + '--date-fuzz', + metavar='DAYS', + type=int, + default=30, + # Loading an extra month on each end should be more than enough for + # real reconciliation. This switch is mostly a debug aid. + help=argparse.SUPPRESS, + ) + parser.add_argument( + '--output-file', '-O', + metavar='PATH', + type=Path, + help="""Write the report to this file, or stdout when PATH is `-`. +The default is `PayPalReconciliation__.ods`. +""") + parser.add_argument( + 'statement', + type=Path, + help="Path to the PayPal statement CSV", + ) + return parser.parse_args(arglist) + +def main(arglist: Optional[Sequence[str]]=None, + stdout: TextIO=sys.stdout, + stderr: TextIO=sys.stderr, + config: Optional[configmod.Config]=None, +) -> int: + args = parse_arguments(arglist) + cliutil.set_loglevel(logger, args.loglevel) + if config is None: + config = configmod.Config() + config.load_file() + + date_range = DateRange( + args.start_date or datetime.date(datetime.MINYEAR, 1, 1), + args.stop_date or datetime.date(datetime.MAXYEAR, 12, 1), + ) + reconcilers: Mapping[str, PayPalReconciler] = collections.defaultdict(PayPalReconciler) + for stmt_post in PayPalPosting.iter_statement(args.statement): + if stmt_post.amount.number and stmt_post.date in date_range: + reconcilers[stmt_post.txn_id].add_from_statement(stmt_post) + if not reconcilers: + logger.warning("no posting data read from statement") + + if args.start_date is None: + args.start_date = min( + post.date + for rec in reconcilers.values() + for post in rec.statement_posts + ) + if args.stop_date is None: + args.stop_date = max( + post.date + for rec in reconcilers.values() + for post in rec.statement_posts + ) + datetime.timedelta(days=1) + + returncode = os.EX_OK + books_loader = config.books_loader() + if books_loader is None: + entries, load_errors, options = books.Loader.load_none(config.config_file_path()) + returncode = cliutil.ExitCode.NoConfiguration + else: + date_fuzz = datetime.timedelta(days=args.date_fuzz) + entries, load_errors, options = books_loader.load_fy_range( + args.start_date - date_fuzz, args.stop_date + date_fuzz, + ) + if load_errors: + returncode = cliutil.ExitCode.BeancountErrors + for error in load_errors: + bc_printer.print_error(error, file=stderr) + + date_range = DateRange(args.start_date, args.stop_date) + for bean_post in data.Posting.from_entries(entries): + if bean_post.account != args.account: + continue + paypal_post = PayPalPosting.from_books(bean_post) + if paypal_post.txn_id in reconcilers or ( + paypal_post.amount.number and paypal_post.date in date_range + ): + reconcilers[paypal_post.txn_id].add_from_books(paypal_post) + + report = PayPalReconciliationReport() + report.write_all(reconcilers) + if args.output_file is None: + args.output_file = Path('PayPalReconciliation_{}_{}.ods'.format( + args.start_date.isoformat(), + args.stop_date.isoformat(), + )) + logger.info("Writing report to %s", args.output_file) + ods_file = cliutil.bytes_output(args.output_file, stdout) + report.save_file(ods_file) + return returncode + +entry_point = cliutil.make_entry_point(__name__, PROGNAME) + +if __name__ == '__main__': + exit(entry_point()) diff --git a/setup.py b/setup.py index e7bb3fd..9033b48 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup setup( name='conservancy_beancount', description="Plugin, library, and reports for reading Conservancy's books", - version='1.15.5', + version='1.16.0', author='Software Freedom Conservancy', author_email='info@sfconservancy.org', license='GNU AGPLv3+', @@ -35,6 +35,7 @@ setup( 'conservancy_beancount.pdfforms', 'conservancy_beancount.pdfforms.extract', 'conservancy_beancount.plugin', + 'conservancy_beancount.reconcile', 'conservancy_beancount.reports', 'conservancy_beancount.tools', ], @@ -52,6 +53,7 @@ setup( 'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point', 'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point', 'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point', + 'reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point', 'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point', ], },