split_ods_links: New tool.

See docstring—this is mostly a post-filter to improve Excel compatibility.
2020-09-08 23:37:00 -04:00 · 2020-09-08 23:37:00 -04:00 · 3219bf89d2
commit 3219bf89d2
parent da056917bf
2 changed files with 140 additions and 1 deletions
--- a/conservancy_beancount/tools/split_ods_links.py
+++ b/conservancy_beancount/tools/split_ods_links.py
@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""split_ods_links.py - Rewrite an ODS to have at most one link per cell
+
+This is useful when you plan to send the spreadsheet to an Excel user, which
+only supports one link per cell.
+"""
+# Copyright © 2020 Brett Smith
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import argparse
+import concurrent.futures as futmod
+import logging
+import os
+import sys
+
+from pathlib import Path
+from zipfile import BadZipFile
+
+import odf.opendocument  # type:ignore[import]
+import odf.table  # type:ignore[import]
+import odf.text  # type:ignore[import]
+
+from ..reports.core import BaseODS
+
+from typing import (
+    Iterator,
+    Optional,
+    Sequence,
+    TextIO,
+    Tuple,
+)
+
+from .. import cliutil
+
+PROGNAME = 'split-ods-links'
+logger = logging.getLogger('conservancy_beancount.tools.split_ods_links')
+
+class ODS(BaseODS[Tuple[None], None]):
+    def __init__(self, ods_path: Path) -> None:
+        super().__init__()
+        self.document = odf.opendocument.load(ods_path)
+        self.dirty = False
+
+    def section_key(self, row: Tuple[None]) -> None:
+        raise NotImplementedError("split_ods_links.ODS.section_key")
+
+    def split_row_cells(self, row: odf.table.TableRow, count: int) -> Iterator[odf.table.TableRow]:
+        for row_index in range(count):
+            new_row = self.copy_element(row)
+            for cell_index, cell in enumerate(new_row.childNodes):
+                try:
+                    cell.childNodes = [cell.childNodes[row_index]]
+                except IndexError:
+                    new_row.childNodes[cell_index] = odf.table.TableCell()
+            yield new_row
+
+    def split_link_cells(self) -> None:
+        for sheet in self.document.spreadsheet.getElementsByType(odf.table.Table):
+            for row in sheet.getElementsByType(odf.table.TableRow):
+                cells = row.getElementsByType(odf.table.TableCell)
+                child_counts = [len(cell.childNodes) for cell in cells]
+                link_counts = [len(cell.getElementsByType(odf.text.A)) for cell in cells]
+                if any(count > 1 for count in link_counts):
+                    for new_row in self.split_row_cells(row, max(child_counts)):
+                        sheet.insertBefore(new_row, row)
+                    sheet.removeChild(row)
+                    self.dirty = True
+
+    @classmethod
+    def run_split(cls, path: Path, suffix: str) -> bool:
+        ods = cls(path)
+        ods.split_link_cells()
+        if ods.dirty:
+            out_path = path.with_name(path.name.replace('.', f'{suffix}.', 1))
+            ods.save_path(out_path)
+        return ods.dirty
+
+
+def parse_arguments(arglist: Optional[Sequence[str]]=None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(prog=PROGNAME)
+    cliutil.add_version_argument(parser)
+    cliutil.add_loglevel_argument(parser)
+    cliutil.add_jobs_argument(parser)
+    parser.add_argument(
+        '--suffix', '-s',
+        default='_split',
+        help="""Suffix to add to filenames for modified spreadsheets.
+Pass an empty string argument to overwrite the original spreadsheet.
+Default %(default)r.
+""")
+    parser.add_argument(
+        'ods_paths',
+        metavar='ODS_PATH',
+        type=Path,
+        nargs=argparse.ONE_OR_MORE,
+        help="""ODS file(s) to split links in
+""")
+    return parser.parse_args(arglist)
+
+def main(arglist: Optional[Sequence[str]]=None,
+         stdout: TextIO=sys.stdout,
+         stderr: TextIO=sys.stderr,
+) -> int:
+    args = parse_arguments(arglist)
+    cliutil.set_loglevel(logger, args.loglevel)
+    args.ods_paths.sort(key=lambda path: path.stat().st_size, reverse=True)
+
+    returncode = 0
+    max_procs = max(1, min(args.jobs, len(args.ods_paths)))
+    with futmod.ProcessPoolExecutor(max_procs) as pool:
+        procs = {pool.submit(ODS.run_split, path, args.suffix) for path in args.ods_paths}
+        for ods_path, proc in zip(args.ods_paths, procs):
+            try:
+                proc.result()
+            except IOError as error:
+                logger.error("error reading %s: %s", ods_path, error.strerror)
+                returncode = os.EX_DATAERR
+            except BadZipFile as error:
+                logger.error("error parsing %s: %s", ods_path, error.args[0])
+                returncode = os.EX_DATAERR
+    return returncode
+
+entry_point = cliutil.make_entry_point(__name__, PROGNAME)
+
+if __name__ == '__main__':
+    exit(entry_point())
--- a/setup.py
+++ b/setup.py
@ -5,7 +5,7 @@ from setuptools import setup
 setup(
    name='conservancy_beancount',
    description="Plugin, library, and reports for reading Conservancy's books",
-    version='1.9.7',
+    version='1.10.0',
    author='Software Freedom Conservancy',
    author_email='info@sfconservancy.org',
    license='GNU AGPLv3+',
@ -44,6 +44,7 @@ setup(
            'fund-report = conservancy_beancount.reports.fund:entry_point',
            'ledger-report = conservancy_beancount.reports.ledger:entry_point',
            'opening-balances = conservancy_beancount.tools.opening_balances:entry_point',
+            'split-ods-links = conservancy_beancount.tools.split_ods_links:entry_point',
        ],
    },
 )