From 6fa12789661ef7fdf62a4d3216a98b10582ac2c9 Mon Sep 17 00:00:00 2001
From: Brett Smith <brettcsmith@brettcsmith.org>
Date: Tue, 9 Mar 2021 09:39:56 -0500
Subject: [PATCH] query: Improve formatting of ODS output.

* Provide dedicated formatting for more Beancount types.
* Improve code to determine when we're looking up link metadata
  and should format output as links.
---
 conservancy_beancount/reports/core.py  |   2 +-
 conservancy_beancount/reports/query.py | 154 ++++++++++++++++++-------
 tests/test_reports_query.py            | 112 ++++++++++++------
 3 files changed, 193 insertions(+), 75 deletions(-)

diff --git a/conservancy_beancount/reports/core.py b/conservancy_beancount/reports/core.py
index 43550f1..ca83943 100644
--- a/conservancy_beancount/reports/core.py
+++ b/conservancy_beancount/reports/core.py
@@ -1368,7 +1368,7 @@ class BaseODS(BaseSpreadsheet[RT, ST], metaclass=abc.ABCMeta):
             )
             return self.multiline_cell(lines, **attrs)
 
-    def currency_cell(self, amount: data.Amount, **attrs: Any) -> odf.table.TableCell:
+    def currency_cell(self, amount: bc_amount._Amount, **attrs: Any) -> odf.table.TableCell:
         if 'stylename' not in attrs:
             attrs['stylename'] = self.currency_style(amount.currency)
         number, currency = amount
diff --git a/conservancy_beancount/reports/query.py b/conservancy_beancount/reports/query.py
index 77a5222..9ca9611 100644
--- a/conservancy_beancount/reports/query.py
+++ b/conservancy_beancount/reports/query.py
@@ -33,6 +33,7 @@ from typing import (
     Union,
 )
 from ..beancount_types import (
+    MetaKey,
     MetaValue,
     Posting,
     Transaction,
@@ -41,6 +42,8 @@ from ..beancount_types import (
 from decimal import Decimal
 from pathlib import Path
 from beancount.core.amount import _Amount as BeancountAmount
+from beancount.core.inventory import Inventory
+from beancount.core.position import _Position as Position
 
 import beancount.query.numberify as bc_query_numberify
 import beancount.query.query_compile as bc_query_compile
@@ -59,10 +62,6 @@ from .. import config as configmod
 from .. import data
 from .. import rtutil
 
-BUILTIN_FIELDS: AbstractSet[str] = frozenset(itertools.chain(
-    bc_query_env.TargetsEnvironment.columns,  # type:ignore[has-type]
-    bc_query_env.TargetsEnvironment.functions,  # type:ignore[has-type]
-))
 PROGNAME = 'query-report'
 logger = logging.getLogger('conservancy_beancount.reports.query')
 
@@ -80,6 +79,12 @@ EnvironmentFunctions = Dict[
 RowTypes = Sequence[Tuple[str, Type]]
 Rows = Sequence[NamedTuple]
 Store = List[Any]
+QueryExpression = Union[
+    bc_query_parser.Column,
+    bc_query_parser.Constant,
+    bc_query_parser.Function,
+    bc_query_parser.UnaryOp,
+]
 QueryStatement = Union[
     bc_query_parser.Balances,
     bc_query_parser.Journal,
@@ -124,14 +129,45 @@ class BooksLoader:
 
 
 class QueryODS(core.BaseODS[NamedTuple, None]):
+    META_FNAMES = frozenset([
+        'any_meta',
+        'entry_meta',
+        'meta',
+        'meta_docs',
+        'str_meta',
+    ])
+
     def is_empty(self) -> bool:
         return not self.sheet.childNodes
 
     def section_key(self, row: NamedTuple) -> None:
         return None
 
+    def _generic_cell(self, value: Any) -> odf.table.TableCell:
+        if isinstance(value, Iterable) and not isinstance(value, (str, tuple)):
+            return self.multiline_cell(value)
+        else:
+            return self.string_cell('' if value is None else str(value))
+
+    def _inventory_cell(self, value: Inventory) -> odf.table.TableCell:
+        return self.balance_cell(core.Balance(pos.units for pos in value))
+
+    def _link_string_cell(self, value: str) -> odf.table.TableCell:
+        return self.meta_links_cell(value.split())
+
+    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
+        return self._cell_type(type(value))(value)
+
+    def _position_cell(self, value: Position) -> odf.table.TableCell:
+        return self.currency_cell(value.units)
+
     def _cell_type(self, row_type: Type) -> CellFunc:
-        if issubclass(row_type, BeancountAmount):
+        """Return a function to create a cell, for non-metadata row types."""
+        if issubclass(row_type, Inventory):
+            return self._inventory_cell
+        elif issubclass(row_type, Position):
+            return self._position_cell
+        elif issubclass(row_type, BeancountAmount):
             return self.currency_cell
         elif issubclass(row_type, (int, float, Decimal)):
             return self.float_cell
@@ -142,49 +178,85 @@ class QueryODS(core.BaseODS[NamedTuple, None]):
         else:
             return self._generic_cell
 
-    def _generic_cell(self, value: Any) -> odf.table.TableCell:
-        return self.string_cell('' if value is None else str(value))
-
-    def _link_cell(self, value: MetaValue) -> odf.table.TableCell:
-        if isinstance(value, str):
-            return self.meta_links_cell(value.split())
+    def _link_cell_type(self, row_type: Type) -> CellFunc:
+        """Return a function to create a cell from metadata with documentation links."""
+        if issubclass(row_type, str):
+            return self._link_string_cell
+        elif issubclass(row_type, tuple):
+            return self._generic_cell
+        elif issubclass(row_type, Iterable):
+            return self.meta_links_cell
         else:
-            return self._generic_cell(value)
+            return self._generic_cell
 
-    def _metadata_cell(self, value: MetaValue) -> odf.table.TableCell:
-        return self._cell_type(type(value))(value)
+    def _meta_target(self, target: QueryExpression) -> Optional[MetaKey]:
+        """Return the metadata key looked up by this target, if any
 
-    def _cell_types(self, row_types: RowTypes) -> Iterator[CellFunc]:
-        for name, row_type in row_types:
-            if row_type is object:
-                if name.replace('_', '-') in data.LINK_METADATA:
-                    yield self._link_cell
-                else:
-                    yield self._metadata_cell
-            else:
+        This function takes a parsed target (i.e., what we're SELECTing) and
+        recurses it to see whether it's looking up any metadata. If so, it
+        returns the key of that metadata. Otherwise it returns None.
+        """
+        if isinstance(target, bc_query_parser.UnaryOp):
+            return self._meta_target(target.operand)
+        elif not isinstance(target, bc_query_parser.Function):
+            return None
+        try:
+            operand = target.operands[0]
+        except IndexError:
+            return None
+        if (target.fname in self.META_FNAMES
+            and isinstance(operand, bc_query_parser.Constant)):
+            return operand.value  # type:ignore[no-any-return]
+        else:
+            for operand in target.operands:
+                retval = self._meta_target(operand)
+                if retval is not None:
+                    break
+            return retval
+
+    def _cell_types(self, statement: QueryStatement, row_types: RowTypes) -> Iterator[CellFunc]:
+        """Return functions to create table cells from result rows
+
+        Given a parsed query and the types of return rows, yields a function
+        to create a cell for each column in the row, in order. The returned
+        functions vary in order to provide the best available formatting for
+        different data types.
+        """
+        if (isinstance(statement, bc_query_parser.Select)
+            and isinstance(statement.targets, Sequence)):
+            targets = [t.expression for t in statement.targets]
+        else:
+            # Synthesize something that makes clear we're not loading metadata.
+            targets = [bc_query_parser.Column(name) for name, _ in row_types]
+        for target, (_, row_type) in zip(targets, row_types):
+            meta_key = self._meta_target(target)
+            if meta_key is None:
                 yield self._cell_type(row_type)
+            elif meta_key in data.LINK_METADATA:
+                yield self._link_cell_type(row_type)
+            else:
+                yield self._metadata_cell
 
-    def write_query(self, row_types: RowTypes, rows: Rows) -> None:
+    def write_query(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
         if self.is_empty():
             self.sheet.setAttribute('name', "Query 1")
         else:
             self.use_sheet(f"Query {len(self.document.spreadsheet.childNodes) + 1}")
         for name, row_type in row_types:
-            if row_type is object or issubclass(row_type, str):
-                col_width = 2.0
-            elif issubclass(row_type, BeancountAmount):
+            if issubclass(row_type, datetime.date):
+                col_width = 1.0
+            elif issubclass(row_type, (BeancountAmount, Inventory, Position)):
                 col_width = 1.5
             else:
-                col_width = 1.0
+                col_width = 2.0
             col_style = self.column_style(col_width)
             self.sheet.addElement(odf.table.TableColumn(stylename=col_style))
         self.add_row(*(
-            self.string_cell(data.Metadata.human_name(name.replace('_', '-')),
-                             stylename=self.style_bold)
+            self.string_cell(data.Metadata.human_name(name), stylename=self.style_bold)
             for name, _ in row_types
         ))
         self.lock_first_row()
-        cell_funcs = list(self._cell_types(row_types))
+        cell_funcs = list(self._cell_types(statement, row_types))
         for row in rows:
             self.add_row(*(
                 cell_func(value)
@@ -238,7 +310,7 @@ class AggregateSet(bc_query_compile.EvalAggregator):
 
     def update(self, store: Store, context: Context) -> None:
         value, = self.eval_args(context)
-        if isinstance(value, Sequence) and not isinstance(value, str):
+        if isinstance(value, Sequence) and not isinstance(value, (str, tuple)):
             store[self.handle].update(value)
         else:
             store[self.handle].add(value)
@@ -304,9 +376,9 @@ class BQLShell(bc_query_shell.BQLShell):
             print("(empty)", file=self.outfile)
         else:
             logger.debug("rendering query as %s", output_format)
-            render_func(row_types, rows)
+            render_func(statement, row_types, rows)
 
-    def _render_csv(self, row_types: RowTypes, rows: Rows) -> None:
+    def _render_csv(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
         bc_query_render.render_csv(
             row_types,
             rows,
@@ -315,11 +387,15 @@ class BQLShell(bc_query_shell.BQLShell):
             self.vars['expand'],
         )
 
-    def _render_ods(self, row_types: RowTypes, rows: Rows) -> None:
-        self.ods.write_query(row_types, rows)
-        logger.info("results saved in sheet %s", self.ods.sheet.getAttribute('name'))
+    def _render_ods(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
+        self.ods.write_query(statement, row_types, rows)
+        logger.info(
+            "%s rows of results saved in sheet %s",
+            len(rows),
+            self.ods.sheet.getAttribute('name'),
+        )
 
-    def _render_text(self, row_types: RowTypes, rows: Rows) -> None:
+    def _render_text(self, statement: QueryStatement, row_types: RowTypes, rows: Rows) -> None:
         with contextlib.ExitStack() as stack:
             if self.is_interactive:
                 output = stack.enter_context(self.get_pager())
@@ -394,9 +470,7 @@ ODS reports.
         help="""Query to run non-interactively. If none is provided, and
 standard input is not a terminal, reads the query from stdin instead.
 """)
-
-    args = parser.parse_args(arglist)
-    return args
+    return parser.parse_args(arglist)
 
 def main(arglist: Optional[Sequence[str]]=None,
          stdout: TextIO=sys.stdout,
diff --git a/tests/test_reports_query.py b/tests/test_reports_query.py
index 492069b..8f49478 100644
--- a/tests/test_reports_query.py
+++ b/tests/test_reports_query.py
@@ -21,6 +21,7 @@ import pytest
 from . import testutil
 
 from beancount.core import data as bc_data
+from beancount.query import query_parser as bc_query_parser
 from conservancy_beancount.books import FiscalYear
 from conservancy_beancount.reports import query as qmod
 from conservancy_beancount import rtutil
@@ -38,6 +39,10 @@ class MockRewriteRuleset:
             yield post._replace(units=testutil.Amount(number, currency))
 
 
+@pytest.fixture(scope='module')
+def qparser():
+    return bc_query_parser.Parser()
+
 @pytest.fixture(scope='module')
 def rt():
     return rtutil.RT(testutil.RTClient())
@@ -130,40 +135,47 @@ def test_rewrite_query(end_index):
     assert expected.issubset(actual)
     assert frozenset(accounts).difference(expected).isdisjoint(actual)
 
-def test_ods_amount_formatting():
+def test_ods_amount_formatting(qparser):
+    statement = qparser.parse('SELECT UNITS(position)')
     row_types = [('amount', bc_data.Amount)]
     row_source = [(testutil.Amount(12),), (testutil.Amount(1480, 'JPY'),)]
     ods = qmod.QueryODS()
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
     actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
     assert next(actual)[0].text == 'Amount'
     assert next(actual)[0].text == '$12.00'
     assert next(actual)[0].text == '¥1,480'
     assert next(actual, None) is None
 
-def test_ods_datetime_formatting():
+def test_ods_datetime_formatting(qparser):
+    statement = qparser.parse('SELECT date')
     row_types = [('date', datetime.date)]
     row_source = [(testutil.PAST_DATE,), (testutil.FUTURE_DATE,)]
     ods = qmod.QueryODS()
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
     actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
     assert next(actual)[0].text == 'Date'
     assert next(actual)[0].text == testutil.PAST_DATE.isoformat()
     assert next(actual)[0].text == testutil.FUTURE_DATE.isoformat()
     assert next(actual, None) is None
 
-@pytest.mark.parametrize('meta_key,header_text', [
-    ('check', 'Check'),
-    ('purchase-order', 'Purchase Order'),
-    ('rt-id', 'Ticket'),
+@pytest.mark.parametrize('meta_key,meta_func', [
+    ('check', 'ANY_META'),
+    ('purchase-order', 'META'),
+    ('rt-id', 'META_DOCS'),
 ])
-def test_ods_link_formatting(rt, meta_key, header_text):
-    row_types = [(meta_key.replace('-', '_'), object)]
-    row_source = [('rt:1/5',), ('rt:3 Checks/9.pdf',)]
+def test_ods_link_formatting(qparser, rt, meta_key, meta_func):
+    meta_func_returns_list = meta_func == 'META_DOCS'
+    statement = qparser.parse(f'SELECT {meta_func}({meta_key!r}) AS docs')
+    row_types = [('docs', list if meta_func_returns_list else str)]
+    row_source = [
+        (s.split() if meta_func_returns_list else s,)
+        for s in ['rt:1/5', 'rt:3 Checks/9.pdf']
+    ]
     ods = qmod.QueryODS(rt)
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
     rows = iter(ods.document.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
-    assert next(rows).text == header_text
+    assert next(rows).text == 'Docs'
     actual = iter(
         [link.text for link in row.getElementsByType(odf.text.A)]
         for row in rows
@@ -172,50 +184,54 @@ def test_ods_link_formatting(rt, meta_key, header_text):
     assert next(actual) == ['rt:3', '9.pdf']
     assert next(actual, None) is None
 
-def test_ods_meta_formatting():
-    row_types = [('metadata', object)]
+def test_ods_meta_formatting(qparser):
+    statement = qparser.parse('SELECT ANY_META("entity") AS entity')
+    row_types = [('entity', object)]
     row_source = [(testutil.Amount(14),), (None,), ('foo bar',)]
     ods = qmod.QueryODS()
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, row_types, row_source)
     actual = testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
-    assert next(actual)[0].text == 'Metadata'
+    assert next(actual)[0].text == 'Entity'
     assert next(actual)[0].text == '$14.00'
     assert next(actual)[0].text == ''
     assert next(actual)[0].text == 'foo bar'
     assert next(actual, None) is None
 
-def test_ods_multicolumn_write(rt):
-    row_types = [('date', datetime.date), ('rt-id', object), ('desc', str)]
+def test_ods_multicolumn_write(qparser, rt):
+    statement = qparser.parse(
+        'SELECT MIN(date) AS date, SET(META_DOCS("rt-id")) AS tix, STR_META("entity") AS entity',
+    )
+    row_types = [('date', datetime.date), ('tix', set), ('entity', str)]
     row_source = [
-        (testutil.PAST_DATE, 'rt:1', 'aaa'),
-        (testutil.FY_START_DATE, 'rt:2', 'bbb'),
-        (testutil.FUTURE_DATE, 'rt:3', 'ccc'),
+        (testutil.PAST_DATE, {'rt:1'}, 'AA'),
+        (testutil.FY_START_DATE, {'rt:2'}, 'BB'),
+        (testutil.FUTURE_DATE, {'rt:3', 'rt:4'}, 'CC'),
     ]
     ods = qmod.QueryODS(rt)
-    ods.write_query(row_types, row_source)
+    ods.write_query(statement, list(row_types), list(row_source))
     actual = iter(
         cell.text
         for row in testutil.ODSCell.from_sheet(ods.document.spreadsheet.firstChild)
         for cell in row
     )
-    assert next(actual) == 'Date'
-    assert next(actual) == 'Ticket'
-    assert next(actual) == 'Desc'
+    for expected, _ in row_types:
+        assert next(actual) == expected.title()
     assert next(actual) == testutil.PAST_DATE.isoformat()
     assert next(actual) == 'rt:1'
-    assert next(actual) == 'aaa'
+    assert next(actual) == 'AA'
     assert next(actual) == testutil.FY_START_DATE.isoformat()
     assert next(actual) == 'rt:2'
-    assert next(actual) == 'bbb'
+    assert next(actual) == 'BB'
     assert next(actual) == testutil.FUTURE_DATE.isoformat()
-    assert next(actual) == 'rt:3'
-    assert next(actual) == 'ccc'
+    assert frozenset(next(actual).split('\0')) == row_source[-1][1]
+    assert next(actual) == 'CC'
     assert next(actual, None) is None
 
-def test_ods_is_empty():
+def test_ods_is_empty(qparser):
+    statement = qparser.parse('SELECT * WHERE date < 1900-01-01')
     ods = qmod.QueryODS()
     assert ods.is_empty()
-    ods.write_query([], [])
+    ods.write_query(statement, [], [])
     assert not ods.is_empty()
 
 @pytest.mark.parametrize('fy,account,amt_prefix', [
@@ -232,8 +248,9 @@ def test_ods_output(fy, account, amt_prefix):
     ]
     returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
     assert returncode == 0
-    stdout.seek(0)
-    ods_doc = odf.opendocument.load(stdout)
+    with stdout:
+        stdout.seek(0)
+        ods_doc = odf.opendocument.load(stdout)
     rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
     next(rows)  # Skip header row
     amt_pattern = rf'^{re.escape(amt_prefix)}\d'
@@ -243,3 +260,30 @@ def test_ods_output(fy, account, amt_prefix):
         assert narration.text.startswith(f'{fy} ')
         assert re.match(amt_pattern, amount.text)
     assert count
+
+def test_ods_aggregate_output():
+    books_path = testutil.test_path(f'books/books/2020.beancount')
+    config = testutil.TestConfig(books_path=books_path)
+    arglist = [
+        '-O', '-',
+        '-f', 'ods',
+        'SELECT account, SET(narration), SUM(UNITS(position))',
+        'WHERE date >= 2020-04-01 AND date <= 2020-04-02',
+        'GROUP BY account ORDER BY account ASC',
+    ]
+    returncode, stdout, stderr = pipe_main(arglist, config, io.BytesIO)
+    assert returncode == 0
+    with stdout:
+        stdout.seek(0)
+        ods_doc = odf.opendocument.load(stdout)
+    rows = iter(ods_doc.spreadsheet.firstChild.getElementsByType(odf.table.TableRow))
+    next(rows)  # Skip header row
+    actual = {}
+    for row in rows:
+        acct, descs, balance = row.childNodes
+        actual[acct.text] = (frozenset(descs.text.split('\0')), balance.text)
+    in_desc = {'2020 donation'}
+    ex_desc = {'2020 bank maintenance fee'}
+    assert actual['Income:Donations'] == (in_desc, '$20.20')
+    assert actual['Expenses:BankingFees'] == (ex_desc, '$1.00')
+    assert actual['Assets:Checking'] == (in_desc | ex_desc, '($21.20)')