query: Improve formatting of ODS output.

* Provide dedicated formatting for more Beancount types. * Improve code to determine when we're looking up link metadata and should format output as links.
2021-03-09 09:39:56 -05:00 · 2021-03-09 09:39:56 -05:00 · 6fa1278966
commit 6fa1278966
parent 8af45e5f8a
3 changed files with 193 additions and 75 deletions
--- a/conservancy_beancount/reports/core.py
+++ b/conservancy_beancount/reports/core.py
@ -1368,7 +1368,7 @@ class BaseODS(BaseSpreadsheet[RT, ST], metaclass=abc.ABCMeta):
            )
            return self.multiline_cell(lines, **attrs)

-    def currency_cell(self, amount: data.Amount, **attrs: Any) -> odf.table.TableCell:
+    def currency_cell(self, amount: bc_amount._Amount, **attrs: Any) -> odf.table.TableCell:
        if 'stylename' not in attrs:
            attrs['stylename'] = self.currency_style(amount.currency)
        number, currency = amount
--- a/conservancy_beancount/reports/query.py
+++ b/conservancy_beancount/reports/query.py
@ -33,6 +33,7 @@ from typing import (
    Union,
 )
 from ..beancount_types import (
+    MetaKey,
    MetaValue,
    Posting,
    Transaction,
@ -41,6 +42,8 @@ from ..beancount_types import (
 from decimal import Decimal
 from pathlib import Path
 from beancount.core.amount import _Amount as BeancountAmount
+from beancount.core.inventory import Inventory
+from beancount.core.position import _Position as Position

 import beancount.query.numberify as bc_query_numberify
 import beancount.query.query_compile as bc_query_compile
@ -59,10 +62,6 @@ from .. import config as configmod
 from .. import data
 from .. import rtutil

-BUILTIN_FIELDS: AbstractSet[str] = frozenset(itertools.chain(
-    bc_query_env.TargetsEnvironment.columns,  # type:ignore[has-type]
-    bc_query_env.TargetsEnvironment.functions,  # type:ignore[has-type]
-))
 PROGNAME = 'query-report'
 logger = logging.getLogger('conservancy_beancount.reports.query')

@ -80,6 +79,12 @@ EnvironmentFunctions = Dict[
 RowTypes = Sequence[Tuple[str, Type]]
 Rows = Sequence[NamedTuple]
 Store = List[Any]
+QueryExpression = Union[
+    bc_query_parser.Column,
+    bc_query_parser.Constant,
+    bc_query_parser.Function,
+    bc_query_parser.UnaryOp,
+]
 QueryStatement = Union[
    bc_query_parser.Balances,
    bc_query_parser.Journal,
@ -124,14 +129,45 @@ class BooksLoader:


 class QueryODS(core.BaseODS[NamedTuple, None]):
+    META_FNAMES = frozenset([
+        'any_meta',
+        'entry_meta',
+        'meta',
+        'meta_docs',
+        'str_meta',
+    ])
+
    def is_empty(self) -> bool:
        return not self.sheet.childNodes

    def section_key(self, row: NamedTuple) -> None:
        return None

+    def _generic_cell(self, value: Any) -> odf.table.TableCell:
+        if isinstance(value, Iterable) and not isinstance(value, (str, tuple)):
+            return self.multiline_cell(value)
+        else:
+            return self.string_cell('' if value is None else str(value))
+
+    def _inventory_cell(self, value: Inventory) -> odf.table.TableCell:
+        return self.balance_cell(core.Balance(pos.units for pos in value))
+
+    def _link_string_cell(self, value: str) -> odf.table.TableCell:
+        return self.meta_links_cell(value.split())
+
+    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
+        return self._cell_type(type(value))(value)
+
+    def _position_cell(self, value: Position) -> odf.table.TableCell:
+        return self.currency_cell(value.units)
+
    def _cell_type(self, row_type: Type) -> CellFunc:
-        if issubclass(row_type, BeancountAmount):
+        """Return a function to create a cell, for non-metadata row types."""
+        if issubclass(row_type, Inventory):
+            return self._inventory_cell
+        elif issubclass(row_type, Position):
+            return self._position_cell
+        elif issubclass(row_type, BeancountAmount):
            return self.currency_cell
        elif issubclass(row_type, (int, float, Decimal)):
            return self.float_cell
@ -142,49 +178,85 @@ class QueryODS(core.BaseODS[NamedTuple, None]):
        else:
            return self._generic_cell

-    def _generic_cell(self, value: Any) -> odf.table.TableCell:
-        return self.string_cell('' if value is None else str(value))
-
-    def _link_cell(self, value: MetaValue) -> odf.table.TableCell:
-        if isinstance(value, str):
-            return self.meta_links_cell(value.split())
+    def _link_cell_type(self, row_type: Type) -> CellFunc:
+        """Return a function to create a cell from metadata with documentation links."""
+        if issubclass(row_type, str):
+            return self._link_string_cell
+        elif issubclass(row_type, tuple):
+            return self._generic_cell
+        elif issubclass(row_type, Iterable):
+            return self.meta_links_cell
        else:
-            return self._generic_cell(value)
+            return self._generic_cell

-    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
-        return self._cell_type(type(value))(value)
+    def _meta_target(self, target: QueryExpression) -> Optional[MetaKey]:
+        """Return the metadata key looked up by this target, if any

-    def _cell_types(self, row_types: RowTypes) -> Iterator[CellFunc]:
-        for name, row_type in row_types:
-            if row_type is object:
-                if name.replace('_', '-') in data.LINK_METADATA:
-                    yield self._link_cell
-                else:
-                    yield self._metadata_cell
-            else:
+        This function takes a parsed target (i.e., what we're SELECTing) and
+        recurses it to see whether it's looking up any metadata. If so, it
+        returns the key of that metadata. Otherwise it returns None.
+        """
+        if isinstance(target, bc_query_parser.UnaryOp):
+            return self._meta_target(target.operand)
+        elif not isinstance(target, bc_query_parser.Function):
+            return None
+        try:
+            operand = target.operands[0]
+        except IndexError:
+            return None
+        if (target.fname in self.META_FNAMES
+            and isinstance(operand, bc_query_parser.Constant)):
+            return operand.value  # type:ignore[no-any-return]
+        else:
+            for operand in target.operands:
+                retval = self._meta_target(operand)
+                if retval is not None:
+                    break
+            return retval
+
+    def _cell_types(self, statement: QueryStatement, row_types: RowTypes) -> Iterator[CellFunc]:
+        """Return functions to create table cells from result rows
+
+        Given a parsed query and the types of return rows, yields a function
+        to create a cell for each column in the row, in order. The returned
+        functions vary in order to provide the best available formatting for
+        different data types.
+        """
+        if (isinstance(statement, bc_query_parser.Select)
+            and isinstance(statement.targets, Sequence)):
+            targets = [t.expression for t in statement.targets]
+        else:
+            # Synthesize something that makes clear we're not loading metadata.
+            targets = [bc_query_parser.Column(name) for name, _ in row_types]
+        for target, (_, row_type) in zip(targets, row_types):
+            meta_key = self._meta_target(target)
+            if meta_key is None:
                yield self._cell_type(row_type)
+            elif meta_key in data.LINK_METADATA:
+                yield self._link_cell_type(row_type)
+            else:
+                yield self._metadata_cell

-    def write_query(self, row_types: RowTypes, rows: Rows) -> None:
+    def write_query(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
        if self.is_empty():
            self.sheet.setAttribute('name', "Query 1")
        else:
            self.use_sheet(f"Query {len(self.document.spreadsheet.childNodes) + 1}")
        for name, row_type in row_types:
-            if row_type is object or issubclass(row_type, str):
-                col_width = 2.0
-            elif issubclass(row_type, BeancountAmount):
+            if issubclass(row_type, datetime.date):
+                col_width = 1.0
+            elif issubclass(row_type, (BeancountAmount, Inventory, Position)):
                col_width = 1.5
            else:
-                col_width = 1.0
+                col_width = 2.0
            col_style = self.column_style(col_width)
            self.sheet.addElement(odf.table.TableColumn(stylename=col_style))
        self.add_row(*(
-            self.string_cell(data.Metadata.human_name(name.replace('_', '-')),
-                             stylename=self.style_bold)
+            self.string_cell(data.Metadata.human_name(name), stylename=self.style_bold)
            for name, _ in row_types
        ))
        self.lock_first_row()
-        cell_funcs = list(self._cell_types(row_types))
+        cell_funcs = list(self._cell_types(statement, row_types))
        for row in rows:
            self.add_row(*(
                cell_func(value)
@ -238,7 +310,7 @@ class AggregateSet(bc_query_compile.EvalAggregator):

    def update(self, store: Store, context: Context) -> None:
        value, = self.eval_args(context)
-        if isinstance(value, Sequence) and not isinstance(value, str):
+        if isinstance(value, Sequence) and not isinstance(value, (str, tuple)):
            store[self.handle].update(value)
        else:
            store[self.handle].add(value)
@ -304,9 +376,9 @@ class BQLShell(bc_query_shell.BQLShell):
            print("(empty)", file=self.outfile)
        else:
            logger.debug("rendering query as %s", output_format)
-            render_func(row_types, rows)
+            render_func(statement, row_types, rows)

-    def _render_csv(self, row_types: RowTypes, rows: Rows) -> None:
+    def _render_csv(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
        bc_query_render.render_csv(
            row_types,
            rows,
@ -315,11 +387,15 @@ class BQLShell(bc_query_shell.BQLShell):
            self.vars['expand'],
        )

-    def _render_ods(self, row_types: RowTypes, rows: Rows) -> None:
-        self.ods.write_query(row_types, rows)
-        logger.info("results saved in sheet %s", self.ods.sheet.getAttribute('name'))
+    def _render_ods(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
+        self.ods.write_query(statement, row_types, rows)
+        logger.info(
+            "%s rows of results saved in sheet %s",
+            len(rows),
+            self.ods.sheet.getAttribute('name'),
+        )

-    def _render_text(self, row_types: RowTypes, rows: Rows) -> None:
+    def _render_text(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
        with contextlib.ExitStack() as stack:
            if self.is_interactive:
                output = stack.enter_context(self.get_pager())
@ -394,9 +470,7 @@ ODS reports.
        help="""Query to run non-interactively. If none is provided, and
 standard input is not a terminal, reads the query from stdin instead.
 """)
-
-    args = parser.parse_args(arglist)
-    return args
+    return parser.parse_args(arglist)

 def main(arglist: Optional[Sequence[str]]=None,
         stdout: TextIO=sys.stdout,
--- a/tests/test_reports_query.py
+++ b/tests/test_reports_query.py
@ -21,6 +21,7 @@ import pytest
 from . import testutil

 from beancount.core import data as bc_data
+from beancount.query import query_parser as bc_query_parser
 from conservancy_beancount.books import FiscalYear
 from conservancy_beancount.reports import query as qmod
 from conservancy_beancount import rtutil
@ -38,6 +39,10 @@ class MockRewriteRuleset:
            yield post._replace(units=testutil.Amount(number, currency))


+@pytest.fixture(scope='module')
+def qparser():
+    return bc_query_parser.Parser()
+
@pytest.fixture(scope='module')
 def rt():
    return rtutil.RT(testutil.RTClient())
@ -130,40 +135,47 @@ def test_rewrite_query(end_index):
    assert expected.issubset(actual)
    assert frozenset(accounts).difference(expected).isdisjoint(actual)

-def test_ods_amount_formatting():
+def test_ods_amount_formatting(qparser):
+    statement = qparser.parse('SELECT UNITS(position)')
    row_types = [('amount', bc_data.Amount)]
    row_source = [(testutil.Amount(12),), (testutil.Amount(1480, 'JPY'),)]
    ods = qmod.QueryODS()
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
    assert next(actual)[0].text == 'Amount'
    assert next(actual)[0].text == '$12.00'
    assert next(actual)[0].text == '¥1,480'
    assert next(actual, None) is None

-def test_ods_datetime_formatting():
+def test_ods_datetime_formatting(qparser):
+    statement = qparser.parse('SELECT date')
    row_types = [('date', datetime.date)]
    row_source = [(testutil.PAST_DATE,), (testutil.FUTURE_DATE,)]
    ods = qmod.QueryODS()
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
    assert next(actual)[0].text == 'Date'
    assert next(actual)[0].text == testutil.PAST_DATE.isoformat()
    assert next(actual)[0].text == testutil.FUTURE_DATE.isoformat()
    assert next(actual, None) is None

-@pytest.mark.parametrize('meta_key,header_text', [
-    ('check', 'Check'),
-    ('purchase-order', 'Purchase Order'),
-    ('rt-id', 'Ticket'),
+@pytest.mark.parametrize('meta_key,meta_func', [
+    ('check', 'ANY_META'),
+    ('purchase-order', 'META'),
+    ('rt-id', 'META_DOCS'),
 ])
-def test_ods_link_formatting(rt, meta_key, header_text):
-    row_types = [(meta_key.replace('-', '_'), object)]
-    row_source = [('rt:1/5',), ('rt:3 Checks/9.pdf',)]
+def test_ods_link_formatting(qparser, rt, meta_key, meta_func):
+    meta_func_returns_list = meta_func == 'META_DOCS'
+    statement = qparser.parse(f'SELECT {meta_func}({meta_key!r}) AS docs')
+    row_types = [('docs', list if meta_func_returns_list else str)]
+    row_source = [
+        (s.split() if meta_func_returns_list else s,)
+        for s in ['rt:1/5', 'rt:3 Checks/9.pdf']
+    ]
    ods = qmod.QueryODS(rt)
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
    rows = iter(ods.document.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
-    assert next(rows).text == header_text
+    assert next(rows).text == 'Docs'
    actual = iter(
        [link.text for link in row.getElementsByType(odf.text.A)]
        for row in rows
@ -172,50 +184,54 @@ def test_ods_link_formatting(rt, meta_key, header_text):
    assert next(actual) == ['rt:3', '9.pdf']
    assert next(actual, None) is None

-def test_ods_meta_formatting():
-    row_types = [('metadata', object)]
+def test_ods_meta_formatting(qparser):
+    statement = qparser.parse('SELECT ANY_META("entity") AS entity')
+    row_types = [('entity', object)]
    row_source = [(testutil.Amount(14),), (None,), ('foo bar',)]
    ods = qmod.QueryODS()
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
    actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
-    assert next(actual)[0].text == 'Metadata'
+    assert next(actual)[0].text == 'Entity'
    assert next(actual)[0].text == '$14.00'
    assert next(actual)[0].text == ''
    assert next(actual)[0].text == 'foo bar'
    assert next(actual, None) is None

-def test_ods_multicolumn_write(rt):
-    row_types = [('date', datetime.date), ('rt-id', object), ('desc', str)]
+def test_ods_multicolumn_write(qparser, rt):
+    statement = qparser.parse(
+        'SELECT MIN(date) AS date, SET(META_DOCS("rt-id")) AS tix, STR_META("entity") AS entity',
+    )
+    row_types = [('date', datetime.date), ('tix', set), ('entity', str)]
    row_source = [
-        (testutil.PAST_DATE, 'rt:1', 'aaa'),
-        (testutil.FY_START_DATE, 'rt:2', 'bbb'),
-        (testutil.FUTURE_DATE, 'rt:3', 'ccc'),
+        (testutil.PAST_DATE, {'rt:1'}, 'AA'),
+        (testutil.FY_START_DATE, {'rt:2'}, 'BB'),
+        (testutil.FUTURE_DATE, {'rt:3', 'rt:4'}, 'CC'),
    ]
    ods = qmod.QueryODS(rt)
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, list(row_types), list(row_source))
    actual = iter(
        cell.text
        for row in testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
        for cell in row
    )
-    assert next(actual) == 'Date'
-    assert next(actual) == 'Ticket'
-    assert next(actual) == 'Desc'
+    for expected, _ in row_types:
+        assert next(actual) == expected.title()
    assert next(actual) == testutil.PAST_DATE.isoformat()
    assert next(actual) == 'rt:1'
-    assert next(actual) == 'aaa'
+    assert next(actual) == 'AA'
    assert next(actual) == testutil.FY_START_DATE.isoformat()
    assert next(actual) == 'rt:2'
-    assert next(actual) == 'bbb'
+    assert next(actual) == 'BB'
    assert next(actual) == testutil.FUTURE_DATE.isoformat()
-    assert next(actual) == 'rt:3'
-    assert next(actual) == 'ccc'
+    assert frozenset(next(actual).split('\0')) == row_source[-1][1]
+    assert next(actual) == 'CC'
    assert next(actual, None) is None

-def test_ods_is_empty():
+def test_ods_is_empty(qparser):
+    statement = qparser.parse('SELECT * WHERE date < 1900-01-01')
    ods = qmod.QueryODS()
    assert ods.is_empty()
-    ods.write_query([], [])
+    ods.write_query(statement, [], [])
    assert not ods.is_empty()

@pytest.mark.parametrize('fy,account,amt_prefix', [
@ -232,8 +248,9 @@ def test_ods_output(fy, account, amt_prefix):
    ]
    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
    assert returncode == 0
-    stdout.seek(0)
-    ods_doc = odf.opendocument.load(stdout)
+    with stdout:
+        stdout.seek(0)
+        ods_doc = odf.opendocument.load(stdout)
    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
    next(rows)  # Skip header row
    amt_pattern = rf'^{re.escape(amt_prefix)}\d'
@ -243,3 +260,30 @@ def test_ods_output(fy, account, amt_prefix):
        assert narration.text.startswith(f'{fy} ')
        assert re.match(amt_pattern, amount.text)
    assert count
+
+def test_ods_aggregate_output():
+    books_path = testutil.test_path(f'books/books/2020.beancount')
+    config = testutil.TestConfig(books_path=books_path)
+    arglist = [
+        '-O', '-',
+        '-f', 'ods',
+        'SELECT account, SET(narration), SUM(UNITS(position))',
+        'WHERE date >= 2020-04-01 AND date <= 2020-04-02',
+        'GROUP BY account ORDER BY account ASC',
+    ]
+    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
+    assert returncode == 0
+    with stdout:
+        stdout.seek(0)
+        ods_doc = odf.opendocument.load(stdout)
+    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
+    next(rows)  # Skip header row
+    actual = {}
+    for row in rows:
+        acct, descs, balance = row.childNodes
+        actual[acct.text] = (frozenset(descs.text.split('\0')), balance.text)
+    in_desc = {'2020 donation'}
+    ex_desc = {'2020 bank maintenance fee'}
+    assert actual['Income:Donations'] == (in_desc, '$20.20')
+    assert actual['Expenses:BankingFees'] == (ex_desc, '$1.00')
+    assert actual['Assets:Checking'] == (in_desc | ex_desc, '($21.20)')