reconcile: Start module with PayPal reconciliation report.

This commit is contained in:
Brett Smith 2021-01-23 10:23:03 -05:00
parent 91cbbc9159
commit eba4ed45b0
3 changed files with 417 additions and 1 deletions

View file

@ -0,0 +1,414 @@
#!/usr/bin/env python3
"""paypal.py - Reconcile PayPal account from CSV statement"""
# Copyright © 2021 Brett Smith
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
#
# Full copyright and licensing details can be found at toplevel file
# LICENSE.txt in the repository.
import argparse
import collections
import csv
import datetime
import enum
import logging
import operator
import os
import sys
import unicodedata
from decimal import Decimal
from pathlib import Path
import odf.style # type:ignore[import]
import odf.table # type:ignore[import]
from beancount.parser import printer as bc_printer
from .. import books
from .. import cliutil
from .. import config as configmod
from .. import data
from ..ranges import DateRange
from ..reports import core
from typing import (
Any,
Hashable,
Iterable,
Iterator,
List,
Mapping,
NamedTuple,
Optional,
Sequence,
TextIO,
Tuple,
Union,
)
PROGNAME = 'reconcile-paypal'
logger = logging.getLogger('conservancy_beancount.reconcile.paypal')
class ReconcileProblems(enum.IntFlag):
NOT_IN_STATEMENT = enum.auto()
DUP_IN_STATEMENT = enum.auto()
AMOUNT_DIFF = enum.auto()
MONTH_DIFF = enum.auto()
DATE_DIFF = enum.auto()
class PayPalPosting(NamedTuple):
filename: str
lineno: int
txn_id: str
date: datetime.date
amount: data.Amount
description: str
entity: Optional[str]
@classmethod
def from_books(cls, post: data.Posting) -> 'PayPalPosting':
return cls(
post.meta.get('filename', ''),
post.meta.get('lineno', 0),
post.meta.get('paypal-id', '<missing paypal-id>'),
post.meta.date,
post.units,
post.meta.txn.narration,
post.meta.get('entity'),
)
@classmethod
def from_statement(
cls,
row: Mapping[str, str],
filename: str='<statement>',
lineno: int=0,
) -> 'PayPalPosting':
row_net = Decimal(row['Net'].replace(',', ''))
return cls(
filename,
lineno,
row['Transaction ID'],
datetime.datetime.strptime(row['Date'], '%m/%d/%Y').date(),
data.Amount(row_net, row['Currency']),
row['Name'],
None,
)
@classmethod
def iter_statement(cls, path: Path) -> Iterator['PayPalPosting']:
with path.open() as csv_file:
# PayPal's CSVs start with a ZWNJ space.
# Discard that, assuming it's still there.
orig_pos = csv_file.tell()
char1 = csv_file.read(1)
if not unicodedata.category(char1).startswith('C'):
csv_file.seek(orig_pos)
in_csv = csv.DictReader(csv_file)
filename = str(path)
for lineno, row in enumerate(in_csv, 2):
yield cls.from_statement(row, filename, lineno)
def compare(self, other: 'PayPalPosting') -> int:
retval = 0
if (self.date.year != other.date.year
or self.date.month != other.date.month):
retval |= ReconcileProblems.MONTH_DIFF
retval |= ReconcileProblems.DATE_DIFF
elif self.date.day != other.date.day:
retval |= ReconcileProblems.DATE_DIFF
return retval
class PayPalReconciler:
__slots__ = ['books_posts', 'statement_posts']
def __init__(self) -> None:
self.books_posts: List[PayPalPosting] = []
self.statement_posts: List[PayPalPosting] = []
def add_from_books(self, post: PayPalPosting) -> None:
self.books_posts.append(post)
def add_from_statement(self, post: PayPalPosting) -> None:
self.statement_posts.append(post)
def post_problems(self) -> Iterator[Tuple[PayPalPosting, int]]:
try:
statement_post = self.statement_posts[0]
except IndexError:
try:
statement_post = self.books_posts[0]
except IndexError:
return
for post in self.books_posts:
yield (post, statement_post.compare(post))
def problems(self) -> int:
stmt_count = len(self.statement_posts)
if stmt_count == 0:
return ReconcileProblems.NOT_IN_STATEMENT
elif stmt_count > 1:
return ReconcileProblems.DUP_IN_STATEMENT
else:
balance = core.Balance(post.amount for post in self.books_posts)
if not (balance - self.statement_posts[0].amount).is_zero():
return ReconcileProblems.AMOUNT_DIFF
worst_problem = 0
for _, problems in self.post_problems():
if problems & ~ReconcileProblems.DATE_DIFF:
return problems
else:
worst_problem = worst_problem or problems
return worst_problem
def sort_key(self) -> Hashable:
try:
post = self.statement_posts[0]
except IndexError:
post = self.books_posts[0]
return post.date
class PayPalReconciliationReport(core.BaseODS[PayPalPosting, str]):
COLUMN_WIDTHS = {
'Source': 1.5,
'Transaction ID': 1.5,
'Date': None,
'Amount': 1,
'Description': 2.5,
'Entity': 1.5,
}
def __init__(self) -> None:
super().__init__()
self.bad_sheet = self.start_sheet("Reconciliation")
self.good_sheet = self.start_sheet("Matched Postings")
def init_styles(self) -> None:
super().init_styles()
self.error_style = self.bgcolor_style('#ffcc99')
self.warning_style = self.bgcolor_style('#ffff00')
self.endrow_style = self.border_style(core.Border.BOTTOM, '1pt')
def start_sheet(self, name: str) -> odf.table.Table:
self.use_sheet(name)
for col_width in self.COLUMN_WIDTHS.values():
col_style = None if col_width is None else self.column_style(col_width)
self.sheet.addElement(odf.table.TableColumn(stylename=col_style))
self.add_row(*(
self.string_cell(col_name, stylename=self.style_bold)
for col_name in self.COLUMN_WIDTHS
))
self.lock_first_row()
return self.sheet
def section_key(self, row: PayPalPosting) -> str:
return row.txn_id
def write_row(
self,
row: PayPalPosting,
source_style: Union[None, str, odf.style.Style]=None,
problems: int=0,
) -> None:
source_style = self.merge_styles(source_style, self.style_endtext)
if problems & ReconcileProblems.MONTH_DIFF:
date_style: Optional[odf.style.Style] = self.error_style
elif problems & ReconcileProblems.DATE_DIFF:
date_style = self.warning_style
else:
date_style = None
date_style = self.merge_styles(date_style, self.style_date)
if problems & ReconcileProblems.AMOUNT_DIFF:
amount_style: Optional[odf.style.Style] = self.error_style
else:
amount_style = None
amount_style = self.merge_styles(
amount_style, self.currency_style(row.amount.currency),
)
self.add_row(
self.string_cell(f'{row.filename}:{row.lineno}', stylename=source_style),
self.string_cell(row.txn_id),
self.date_cell(row.date, stylename=date_style),
self.currency_cell(row.amount, stylename=amount_style),
self.string_cell(row.description),
self.string_cell(row.entity or ''),
)
def write_missing(self, source: str, txn_id: str) -> None:
self.add_row(
self.string_cell(source, stylename=self.error_style),
self.string_cell(txn_id),
odf.table.TableCell(),
odf.table.TableCell(),
self.string_cell(f"not found in {source}"),
odf.table.TableCell(),
)
def write_all(self, source: Mapping[str, PayPalReconciler]) -> None:
for txn_id, reconciler in sorted(
source.items(), key=lambda kv: kv[1].sort_key(),
):
problems = reconciler.problems()
if problems & ~ReconcileProblems.DATE_DIFF:
self.sheet = self.bad_sheet
else:
self.sheet = self.good_sheet
posts_iter = iter(reconciler.statement_posts)
try:
statement_post = next(posts_iter)
except StopIteration:
self.write_missing('statement', txn_id)
else:
self.write_row(statement_post)
for post in posts_iter:
stmt_problems = statement_post.compare(post)
if post.amount != statement_post.amount:
stmt_problems |= ReconcileProblems.AMOUNT_DIFF
self.write_row(statement_post, self.error_style, stmt_problems)
if self.sheet is self.bad_sheet:
probs_iter = reconciler.post_problems()
try:
post, post_problems = next(probs_iter)
except StopIteration:
self.write_missing('books', txn_id)
else:
self.write_row(post, None, problems | post_problems)
for post, post_problems in probs_iter:
self.write_row(post, None, problems | post_problems)
for cell in self.sheet.lastChild.childNodes:
cell_style = self.merge_styles(
cell.getAttribute('stylename'),
self.endrow_style,
)
cell.setAttribute('stylename', cell_style)
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
parser = argparse.ArgumentParser()
cliutil.add_version_argument(parser)
cliutil.add_loglevel_argument(parser)
parser.add_argument(
'--account',
default='Assets:PayPal',
help="""Name of the Beancount account with PayPal postings.
Default %(default)r.
""")
parser.add_argument(
'--begin', '--start', '-b',
dest='start_date',
metavar='DATE',
type=cliutil.date_arg,
help="""Date to start reconciliation, inclusive, in YYYY-MM-DD format.
The default is the first date in the statement.
""")
parser.add_argument(
'--end', '--stop', '-e',
dest='stop_date',
metavar='DATE',
type=cliutil.date_arg,
help="""Date to stop reconciliation, exclusive, in YYYY-MM-DD format.
The default is the last date in the statement.
""")
parser.add_argument(
'--date-fuzz',
metavar='DAYS',
type=int,
default=30,
# Loading an extra month on each end should be more than enough for
# real reconciliation. This switch is mostly a debug aid.
help=argparse.SUPPRESS,
)
parser.add_argument(
'--output-file', '-O',
metavar='PATH',
type=Path,
help="""Write the report to this file, or stdout when PATH is `-`.
The default is `PayPalReconciliation_<StartDate>_<StopDate>.ods`.
""")
parser.add_argument(
'statement',
type=Path,
help="Path to the PayPal statement CSV",
)
return parser.parse_args(arglist)
def main(arglist: Optional[Sequence[str]]=None,
stdout: TextIO=sys.stdout,
stderr: TextIO=sys.stderr,
config: Optional[configmod.Config]=None,
) -> int:
args = parse_arguments(arglist)
cliutil.set_loglevel(logger, args.loglevel)
if config is None:
config = configmod.Config()
config.load_file()
date_range = DateRange(
args.start_date or datetime.date(datetime.MINYEAR, 1, 1),
args.stop_date or datetime.date(datetime.MAXYEAR, 12, 1),
)
reconcilers: Mapping[str, PayPalReconciler] = collections.defaultdict(PayPalReconciler)
for stmt_post in PayPalPosting.iter_statement(args.statement):
if stmt_post.amount.number and stmt_post.date in date_range:
reconcilers[stmt_post.txn_id].add_from_statement(stmt_post)
if not reconcilers:
logger.warning("no posting data read from statement")
if args.start_date is None:
args.start_date = min(
post.date
for rec in reconcilers.values()
for post in rec.statement_posts
)
if args.stop_date is None:
args.stop_date = max(
post.date
for rec in reconcilers.values()
for post in rec.statement_posts
) + datetime.timedelta(days=1)
returncode = os.EX_OK
books_loader = config.books_loader()
if books_loader is None:
entries, load_errors, options = books.Loader.load_none(config.config_file_path())
returncode = cliutil.ExitCode.NoConfiguration
else:
date_fuzz = datetime.timedelta(days=args.date_fuzz)
entries, load_errors, options = books_loader.load_fy_range(
args.start_date - date_fuzz, args.stop_date + date_fuzz,
)
if load_errors:
returncode = cliutil.ExitCode.BeancountErrors
for error in load_errors:
bc_printer.print_error(error, file=stderr)
date_range = DateRange(args.start_date, args.stop_date)
for bean_post in data.Posting.from_entries(entries):
if bean_post.account != args.account:
continue
paypal_post = PayPalPosting.from_books(bean_post)
if paypal_post.txn_id in reconcilers or (
paypal_post.amount.number and paypal_post.date in date_range
):
reconcilers[paypal_post.txn_id].add_from_books(paypal_post)
report = PayPalReconciliationReport()
report.write_all(reconcilers)
if args.output_file is None:
args.output_file = Path('PayPalReconciliation_{}_{}.ods'.format(
args.start_date.isoformat(),
args.stop_date.isoformat(),
))
logger.info("Writing report to %s", args.output_file)
ods_file = cliutil.bytes_output(args.output_file, stdout)
report.save_file(ods_file)
return returncode
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
if __name__ == '__main__':
exit(entry_point())

View file

@ -5,7 +5,7 @@ from setuptools import setup
setup(
name='conservancy_beancount',
description="Plugin, library, and reports for reading Conservancy's books",
version='1.15.5',
version='1.16.0',
author='Software Freedom Conservancy',
author_email='info@sfconservancy.org',
license='GNU AGPLv3+',
@ -35,6 +35,7 @@ setup(
'conservancy_beancount.pdfforms',
'conservancy_beancount.pdfforms.extract',
'conservancy_beancount.plugin',
'conservancy_beancount.reconcile',
'conservancy_beancount.reports',
'conservancy_beancount.tools',
],
@ -52,6 +53,7 @@ setup(
'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
'reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point',
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
],
},