query: Start new reporting tool.
Ultimately this is going to be a tool that can generate nicely-formatted spreadsheets from arbitrary bean-queries. This initial version doesn't generate spreadsheets yet, but it does integrate our usual books-loading tools and rewrite rules into existing bean-query functionality, so it's a start. It also has some of the query building and parsing that higher-level spreadsheets will need.
This commit is contained in:
parent
5231a1784f
commit
ccbc447a35
5 changed files with 465 additions and 1 deletions
236
conservancy_beancount/reports/query.py
Normal file
236
conservancy_beancount/reports/query.py
Normal file
|
@ -0,0 +1,236 @@
|
|||
"""query.py - Report arbitrary queries with advanced loading and formatting"""
|
||||
# Copyright © 2021 Brett Smith
|
||||
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||
#
|
||||
# Full copyright and licensing details can be found at toplevel file
|
||||
# LICENSE.txt in the repository.
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import datetime
|
||||
import enum
|
||||
import functools
|
||||
import locale
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
|
||||
from typing import (
|
||||
cast,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
TextIO,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
from ..beancount_types import (
|
||||
MetaValue,
|
||||
Posting,
|
||||
Transaction,
|
||||
)
|
||||
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
import beancount.query.shell as bc_query
|
||||
import beancount.query.query_parser as bc_query_parser
|
||||
|
||||
from . import core
|
||||
from . import rewrite
|
||||
from .. import books
|
||||
from .. import cliutil
|
||||
from .. import config as configmod
|
||||
from .. import data
|
||||
|
||||
PROGNAME = 'query-report'
|
||||
QUERY_PARSER = bc_query_parser.Parser()
|
||||
logger = logging.getLogger('conservancy_beancount.reports.query')
|
||||
|
||||
class BooksLoader:
|
||||
"""Closure to load books with a zero-argument callable
|
||||
|
||||
This matches the load interface that BQLShell expects.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
books_loader: Optional[books.Loader],
|
||||
start_date: Optional[datetime.date]=None,
|
||||
stop_date: Optional[datetime.date]=None,
|
||||
rewrite_rules: Sequence[rewrite.RewriteRuleset]=(),
|
||||
) -> None:
|
||||
self.books_loader = books_loader
|
||||
self.start_date = start_date
|
||||
self.stop_date = stop_date
|
||||
self.rewrite_rules = rewrite_rules
|
||||
|
||||
def __call__(self) -> books.LoadResult:
|
||||
result = books.Loader.dispatch(self.books_loader, self.start_date, self.stop_date)
|
||||
for index, entry in enumerate(result.entries):
|
||||
# entry might not be a Transaction; we catch that later.
|
||||
# The type ignores are because the underlying Beancount type isn't
|
||||
# type-checkable.
|
||||
postings = data.Posting.from_txn(entry) # type:ignore[arg-type]
|
||||
for ruleset in self.rewrite_rules:
|
||||
postings = ruleset.rewrite(postings)
|
||||
try:
|
||||
result.entries[index] = entry._replace(postings=list(postings)) # type:ignore[call-arg]
|
||||
except AttributeError:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
class BQLShell(bc_query.BQLShell):
|
||||
pass
|
||||
|
||||
|
||||
class JoinOperator(enum.Enum):
|
||||
AND = 'AND'
|
||||
OR = 'OR'
|
||||
|
||||
def join(self, parts: Iterable[str]) -> str:
|
||||
return f' {self.value} '.join(parts)
|
||||
|
||||
|
||||
class ReportFormat(enum.Enum):
|
||||
TEXT = 'text'
|
||||
TXT = TEXT
|
||||
CSV = 'csv'
|
||||
# ODS = 'ods'
|
||||
|
||||
|
||||
def _date_condition(
|
||||
date: Union[int, datetime.date],
|
||||
year_to_date: Callable[[int], datetime.date],
|
||||
op: str,
|
||||
) -> str:
|
||||
if isinstance(date, int):
|
||||
date = year_to_date(date)
|
||||
return f'date {op} {date.isoformat()}'
|
||||
|
||||
def build_query(
|
||||
args: argparse.Namespace,
|
||||
fy: books.FiscalYear,
|
||||
in_file: Optional[TextIO]=None,
|
||||
) -> Optional[str]:
|
||||
if not args.query:
|
||||
args.query = [] if in_file is None else [line[:-1] for line in in_file]
|
||||
if not any(re.search(r'\S', s) for s in args.query):
|
||||
return None
|
||||
plain_query = ' '.join(args.query)
|
||||
try:
|
||||
QUERY_PARSER.parse(plain_query)
|
||||
except bc_query_parser.ParseError:
|
||||
conds = [f'({args.join.join(args.query)})']
|
||||
if args.start_date is not None:
|
||||
conds.append(_date_condition(args.start_date, fy.first_date, '>='))
|
||||
if args.stop_date is not None:
|
||||
conds.append(_date_condition(args.stop_date, fy.next_fy_date, '<'))
|
||||
return f'SELECT * WHERE {" AND ".join(conds)}'
|
||||
else:
|
||||
return plain_query
|
||||
|
||||
def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(prog=PROGNAME)
|
||||
cliutil.add_version_argument(parser)
|
||||
parser.add_argument(
|
||||
'--begin', '--start', '-b',
|
||||
dest='start_date',
|
||||
metavar='DATE',
|
||||
type=cliutil.year_or_date_arg,
|
||||
help="""Begin loading entries from this fiscal year. When query-report
|
||||
builds the query, it will include a condition `date >= DATE`.
|
||||
""")
|
||||
parser.add_argument(
|
||||
'--end', '--stop', '-e',
|
||||
dest='stop_date',
|
||||
metavar='DATE',
|
||||
type=cliutil.year_or_date_arg,
|
||||
help="""End loading entries from this fiscal year. When query-report
|
||||
builds the query, it will include a condition `date < DATE`. If you specify a
|
||||
begin date but not an end date, the default end date will be the end of the
|
||||
fiscal year of the begin date.
|
||||
""")
|
||||
cliutil.add_rewrite_rules_argument(parser)
|
||||
format_arg = cliutil.EnumArgument(ReportFormat)
|
||||
parser.add_argument(
|
||||
'--report-type', '--format', '-t', '-f',
|
||||
metavar='TYPE',
|
||||
type=format_arg.enum_type,
|
||||
help="""Format of report to generate. Choices are
|
||||
{format_arg.choices_str()}. Default is guessed from your output filename
|
||||
extension, or 'text' if that fails.
|
||||
""")
|
||||
parser.add_argument(
|
||||
'--output-file', '-O', '-o',
|
||||
metavar='PATH',
|
||||
type=Path,
|
||||
help="""Write the report to this file, or stdout when PATH is `-`.
|
||||
The default is stdout for text and CSV reports, and a generated filename for
|
||||
ODS reports.
|
||||
""")
|
||||
join_arg = cliutil.EnumArgument(JoinOperator)
|
||||
parser.add_argument(
|
||||
'--join', '-j',
|
||||
metavar='OP',
|
||||
type=join_arg.enum_type,
|
||||
default=JoinOperator.AND,
|
||||
help="""When you specify multiple WHERE conditions on the command line
|
||||
and let query-report build the query, join conditions with this operator.
|
||||
Choices are {join_arg.choices_str()}. Default 'and'.
|
||||
""")
|
||||
cliutil.add_loglevel_argument(parser)
|
||||
parser.add_argument(
|
||||
'query',
|
||||
nargs=argparse.ZERO_OR_MORE,
|
||||
help="""Query to run non-interactively. You can specify a full query
|
||||
you write yourself, or conditions to follow WHERE and let query-report build
|
||||
the rest of the query.
|
||||
""")
|
||||
args = parser.parse_args(arglist)
|
||||
return args
|
||||
|
||||
def main(arglist: Optional[Sequence[str]]=None,
|
||||
stdout: TextIO=sys.stdout,
|
||||
stderr: TextIO=sys.stderr,
|
||||
config: Optional[configmod.Config]=None,
|
||||
) -> int:
|
||||
args = parse_arguments(arglist)
|
||||
cliutil.set_loglevel(logger, args.loglevel)
|
||||
if config is None:
|
||||
config = configmod.Config()
|
||||
config.load_file()
|
||||
|
||||
fy = config.fiscal_year_begin()
|
||||
if args.stop_date is None and args.start_date is not None:
|
||||
args.stop_date = fy.next_fy_date(args.start_date)
|
||||
query = build_query(args, fy, None if sys.stdin.isatty() else sys.stdin)
|
||||
is_interactive = query is None and sys.stdin.isatty()
|
||||
if args.report_type is None:
|
||||
try:
|
||||
args.report_type = ReportFormat[args.output_file.suffix[1:].upper()]
|
||||
except (AttributeError, KeyError):
|
||||
args.report_type = ReportFormat.TEXT # if is_interactive else ReportFormat.ODS
|
||||
load_func = BooksLoader(
|
||||
config.books_loader(),
|
||||
args.start_date,
|
||||
args.stop_date,
|
||||
[rewrite.RewriteRuleset.from_yaml(path) for path in args.rewrite_rules],
|
||||
)
|
||||
shell = BQLShell(is_interactive, load_func, stdout, args.report_type.value)
|
||||
shell.on_Reload()
|
||||
if query is None:
|
||||
shell.cmdloop()
|
||||
else:
|
||||
shell.onecmd(query)
|
||||
|
||||
return cliutil.ExitCode.OK
|
||||
|
||||
entry_point = cliutil.make_entry_point(__name__, PROGNAME)
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(entry_point())
|
3
setup.py
3
setup.py
|
@ -5,7 +5,7 @@ from setuptools import setup
|
|||
setup(
|
||||
name='conservancy_beancount',
|
||||
description="Plugin, library, and reports for reading Conservancy's books",
|
||||
version='1.18.1',
|
||||
version='1.18.2',
|
||||
author='Software Freedom Conservancy',
|
||||
author_email='info@sfconservancy.org',
|
||||
license='GNU AGPLv3+',
|
||||
|
@ -53,6 +53,7 @@ setup(
|
|||
'pdfform-extract = conservancy_beancount.pdfforms.extract:entry_point',
|
||||
'pdfform-extract-irs990scheduleA = conservancy_beancount.pdfforms.extract.irs990scheduleA:entry_point',
|
||||
'pdfform-fill = conservancy_beancount.pdfforms.fill:entry_point',
|
||||
'query-report = conservancy_beancount.reports.query:entry_point',
|
||||
'reconcile-paypal = conservancy_beancount.reconcile.paypal:entry_point',
|
||||
'reconcile-statement = conservancy_beancount.reconcile.statement:entry_point',
|
||||
'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
|
||||
|
|
223
tests/test_reports_query.py
Normal file
223
tests/test_reports_query.py
Normal file
|
@ -0,0 +1,223 @@
|
|||
"""test_reports_query.py - Unit tests for query report"""
|
||||
# Copyright © 2021 Brett Smith
|
||||
# License: AGPLv3-or-later WITH Beancount-Plugin-Additional-Permission-1.0
|
||||
#
|
||||
# Full copyright and licensing details can be found at toplevel file
|
||||
# LICENSE.txt in the repository.
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import copy
|
||||
import csv
|
||||
import datetime
|
||||
import io
|
||||
import itertools
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from . import testutil
|
||||
|
||||
from conservancy_beancount.books import FiscalYear
|
||||
from conservancy_beancount.reports import query as qmod
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
class MockRewriteRuleset:
|
||||
def __init__(self, multiplier=2):
|
||||
self.multiplier = multiplier
|
||||
|
||||
def rewrite(self, posts):
|
||||
for post in posts:
|
||||
number, currency = post.units
|
||||
number *= self.multiplier
|
||||
yield post._replace(units=testutil.Amount(number, currency))
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def fy():
|
||||
return FiscalYear(3, 1)
|
||||
|
||||
def pipe_main(arglist, config):
|
||||
stdout = io.StringIO()
|
||||
stderr = io.StringIO()
|
||||
returncode = qmod.main(arglist, stdout, stderr, config)
|
||||
return returncode, stdout, stderr
|
||||
|
||||
def query_args(query=None, start_date=None, stop_date=None, join='AND'):
|
||||
join = qmod.JoinOperator[join]
|
||||
return argparse.Namespace(**locals())
|
||||
|
||||
def test_books_loader_empty():
|
||||
result = qmod.BooksLoader(None)()
|
||||
assert not result.entries
|
||||
assert len(result.errors) == 1
|
||||
|
||||
def test_books_loader_plain():
|
||||
books_path = testutil.test_path(f'books/books/2018.beancount')
|
||||
loader = testutil.TestBooksLoader(books_path)
|
||||
result = qmod.BooksLoader(loader)()
|
||||
assert not result.errors
|
||||
assert result.entries
|
||||
min_date = datetime.date(2018, 3, 1)
|
||||
assert all(ent.date >= min_date for ent in result.entries)
|
||||
|
||||
def test_books_loader_rewrites():
|
||||
rewrites = [MockRewriteRuleset()]
|
||||
books_path = testutil.test_path(f'books/books/2018.beancount')
|
||||
loader = testutil.TestBooksLoader(books_path)
|
||||
result = qmod.BooksLoader(loader, None, None, rewrites)()
|
||||
assert not result.errors
|
||||
assert result.entries
|
||||
numbers = frozenset(
|
||||
abs(post.units.number)
|
||||
for entry in result.entries
|
||||
for post in getattr(entry, 'postings', ())
|
||||
)
|
||||
assert numbers
|
||||
assert all(abs(number) >= 40 for number in numbers)
|
||||
|
||||
@pytest.mark.parametrize('file_s', [None, '', ' \n \n\n'])
|
||||
def test_build_query_empty(fy, file_s):
|
||||
args = query_args()
|
||||
if file_s is None:
|
||||
query = qmod.build_query(args, fy)
|
||||
else:
|
||||
with io.StringIO(file_s) as qfile:
|
||||
query = qmod.build_query(args, fy, qfile)
|
||||
assert query is None
|
||||
|
||||
@pytest.mark.parametrize('query_str', [
|
||||
'SELECT * WHERE date >= 2018-03-01',
|
||||
'select *',
|
||||
'JOURNAL "Income:Donations"',
|
||||
'journal',
|
||||
'BALANCES FROM year=2018',
|
||||
'balances',
|
||||
])
|
||||
def test_build_query_in_arglist(fy, query_str):
|
||||
args = query_args(query_str.split(), testutil.PAST_DATE, testutil.FUTURE_DATE)
|
||||
assert qmod.build_query(args, fy) == query_str
|
||||
|
||||
@pytest.mark.parametrize('count,join_op', enumerate(qmod.JoinOperator, 1))
|
||||
def test_build_query_where_arglist_conditions(fy, count, join_op):
|
||||
conds = ['account ~ "^Income:"', 'year >= 2018'][:count]
|
||||
args = query_args(conds, join=join_op.name)
|
||||
query = qmod.build_query(args, fy)
|
||||
assert query.startswith('SELECT ')
|
||||
cond_index = query.index(' WHERE ') + 7
|
||||
assert query[cond_index:] == '({})'.format(join_op.join(conds))
|
||||
|
||||
@pytest.mark.parametrize('argname,date_arg', itertools.product(
|
||||
['start_date', 'stop_date'],
|
||||
[testutil.FY_START_DATE, testutil.FY_START_DATE.year],
|
||||
))
|
||||
def test_build_query_one_date_arg(fy, argname, date_arg):
|
||||
query_kwargs = {
|
||||
argname: date_arg,
|
||||
'query': ['flag = "*"', 'flag = "!"'],
|
||||
'join': 'OR',
|
||||
}
|
||||
args = query_args(**query_kwargs)
|
||||
query = qmod.build_query(args, fy)
|
||||
assert query.startswith('SELECT ')
|
||||
cond_index = query.index(' WHERE ') + 7
|
||||
if argname == 'start_date':
|
||||
expect_op = '>='
|
||||
year_to_date = fy.first_date
|
||||
else:
|
||||
expect_op = '<'
|
||||
year_to_date = fy.next_fy_date
|
||||
if not isinstance(date_arg, datetime.date):
|
||||
date_arg = year_to_date(date_arg)
|
||||
assert query[cond_index:] == '({}) AND date {} {}'.format(
|
||||
' OR '.join(query_kwargs['query']), expect_op, date_arg.isoformat(),
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize('start_date,stop_date', itertools.product(
|
||||
[testutil.PAST_DATE, testutil.PAST_DATE.year],
|
||||
[testutil.FUTURE_DATE, testutil.FUTURE_DATE.year],
|
||||
))
|
||||
def test_build_query_two_date_args(fy, start_date, stop_date):
|
||||
args = query_args(['account ~ "^Equity:"'], start_date, stop_date, 'AND')
|
||||
query = qmod.build_query(args, fy)
|
||||
assert query.startswith('SELECT ')
|
||||
cond_index = query.index(' WHERE ') + 7
|
||||
if isinstance(start_date, int):
|
||||
start_date = fy.first_date(start_date)
|
||||
if isinstance(stop_date, int):
|
||||
stop_date = fy.next_fy_date(stop_date)
|
||||
assert query[cond_index:] == '({}) AND date >= {} AND date < {}'.format(
|
||||
args.query[0], start_date.isoformat(), stop_date.isoformat(),
|
||||
)
|
||||
|
||||
def test_build_query_plain_from_file(fy):
|
||||
with io.StringIO("SELECT *\n WHERE account ~ '^Assets:';\n") as qfile:
|
||||
query = qmod.build_query(query_args(), fy, qfile)
|
||||
assert re.fullmatch(r"SELECT \*\s+WHERE account ~ '\^Assets:';\s*", query)
|
||||
|
||||
def test_build_query_from_file_where_clauses(fy):
|
||||
conds = ["account ~ '^Income:'", "account ~ '^Expenses:'"]
|
||||
args = query_args(None, testutil.PAST_DATE, testutil.FUTURE_DATE, 'OR')
|
||||
with io.StringIO(''.join(f'{s}\n' for s in conds)) as qfile:
|
||||
query = qmod.build_query(args, fy, qfile)
|
||||
assert query.startswith('SELECT ')
|
||||
cond_index = query.index(' WHERE ') + 7
|
||||
assert query[cond_index:] == '({}) AND date >= {} AND date < {}'.format(
|
||||
' OR '.join(conds),
|
||||
testutil.PAST_DATE.isoformat(),
|
||||
testutil.FUTURE_DATE.isoformat(),
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize('arglist,fy', testutil.combine_values(
|
||||
[['--report-type', 'text'], ['--format=text'], ['-f', 'txt']],
|
||||
range(2018, 2021),
|
||||
))
|
||||
def test_text_query(arglist, fy):
|
||||
books_path = testutil.test_path(f'books/books/{fy}.beancount')
|
||||
config = testutil.TestConfig(books_path=books_path)
|
||||
arglist += ['select', 'date,', 'narration,', 'account,', 'position']
|
||||
returncode, stdout, stderr = pipe_main(arglist, config)
|
||||
assert returncode == 0
|
||||
stdout.seek(0)
|
||||
lines = iter(stdout)
|
||||
next(lines); next(lines) # Skip header
|
||||
for count, line in enumerate(lines, 1):
|
||||
assert re.match(rf'^{fy}-\d\d-\d\d\s+{fy} donation\b', line)
|
||||
assert count >= 2
|
||||
|
||||
@pytest.mark.parametrize('arglist,fy', testutil.combine_values(
|
||||
[['--format=csv'], ['-f', 'csv'], ['-t', 'csv']],
|
||||
range(2018, 2021),
|
||||
))
|
||||
def test_csv_query(arglist, fy):
|
||||
books_path = testutil.test_path(f'books/books/{fy}.beancount')
|
||||
config = testutil.TestConfig(books_path=books_path)
|
||||
arglist += ['select', 'date,', 'narration,', 'account,', 'position']
|
||||
returncode, stdout, stderr = pipe_main(arglist, config)
|
||||
assert returncode == 0
|
||||
stdout.seek(0)
|
||||
for count, row in enumerate(csv.DictReader(stdout), 1):
|
||||
assert re.fullmatch(rf'{fy}-\d\d-\d\d', row['date'])
|
||||
assert row['narration'] == f'{fy} donation'
|
||||
assert count >= 2
|
||||
|
||||
@pytest.mark.parametrize('end_index', range(3))
|
||||
def test_rewrite_query(end_index):
|
||||
books_path = testutil.test_path(f'books/books/2018.beancount')
|
||||
config = testutil.TestConfig(books_path=books_path)
|
||||
accounts = ['Assets', 'Income']
|
||||
expected = frozenset(accounts[:end_index])
|
||||
rewrite_paths = [
|
||||
testutil.test_path(f'userconfig/Rewrite{s}.yml')
|
||||
for s in expected
|
||||
]
|
||||
arglist = [f'--rewrite-rules={path}' for path in rewrite_paths]
|
||||
arglist.append('--format=txt')
|
||||
arglist.append('select any_meta("root") as root')
|
||||
returncode, stdout, stderr = pipe_main(arglist, config)
|
||||
assert returncode == 0
|
||||
stdout.seek(0)
|
||||
actual = frozenset(line.rstrip('\n') for line in stdout)
|
||||
assert expected.issubset(actual)
|
||||
assert frozenset(accounts).difference(expected).isdisjoint(actual)
|
2
tests/userconfig/RewriteAssets.yml
Normal file
2
tests/userconfig/RewriteAssets.yml
Normal file
|
@ -0,0 +1,2 @@
|
|||
- if: [.account in Assets]
|
||||
then: [root = Assets]
|
2
tests/userconfig/RewriteIncome.yml
Normal file
2
tests/userconfig/RewriteIncome.yml
Normal file
|
@ -0,0 +1,2 @@
|
|||
- if: [.account in Income]
|
||||
then: [root = Income]
|
Loading…
Reference in a new issue