conservancy_beancount/conservancy_beancount/cliutil.py
2021-04-29 11:49:46 -04:00

557 lines
19 KiB
Python

"""cliutil - Utilities for CLI tools
This module provides common functionality for
* command line argument parsing
* exception handling
* exit status reporting
* file opening that deals with the ``-`` convention
* logging
"""
PKGNAME = 'conservancy_beancount'
LICENSE = """
Copyright © 2020, 2021 Brett Smith and other contributors
This program is free software: you can redistribute it and/or modify it.
Refer to the LICENSE.txt that came with the software for details.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."""
import argparse
import datetime
import enum
import io
import logging
import operator
import os
import pkg_resources
import re
import signal
import subprocess
import sys
import traceback
import types
from pathlib import Path
import rt.exceptions as rt_error
import yaml
from . import data
from . import errors
from . import filters
from . import rtutil
from typing import (
cast,
Any,
BinaryIO,
Callable,
Container,
Generic,
IO,
Iterable,
Iterator,
List,
NamedTuple,
NoReturn,
Optional,
Sequence,
TextIO,
Type,
TypeVar,
Union,
)
from .beancount_types import (
MetaKey,
Sortable,
)
ET = TypeVar('ET', bound=enum.Enum)
OutputFile = Union[int, IO]
CPU_COUNT = len(os.sched_getaffinity(0))
STDSTREAM_PATH = Path('-')
VERSION = pkg_resources.require(PKGNAME)[0].version
class EnumArgument(Generic[ET]):
"""Wrapper class to use an enum as argument values
Use this class when the user can choose one of some arbitrary enum names
as an argument. It will let user abbreviate and use any case, and will
return the correct value as long as it's unambiguous. Typical usage
looks like::
enum_arg = EnumArgument(Enum)
arg_parser.add_argument(
'--choice',
type=enum_arg.enum_type, # or .value_type
help=f"Choices are {enum_arg.choices_str()}",
)
"""
# I originally wrote this as a mixin class, to eliminate the need for the
# explicit wrapping in the example above. But Python 3.6 doesn't really
# support mixins with Enums; see <https://bugs.python.org/issue29577>.
# This functionality could be moved to a mixin when we drop support for
# Python 3.6.
def __init__(self, base: Type[ET]) -> None:
self.base = base
def enum_type(self, arg: str) -> ET:
"""Return a single enum whose name matches the user argument"""
regexp = re.compile(re.escape(arg), re.IGNORECASE)
matches = frozenset(
choice
for name, choice in self.base.__members__.items()
if regexp.match(name)
)
count = len(matches)
if count == 1:
return next(iter(matches))
elif count:
names = ', '.join(repr(choice.name) for choice in matches)
raise ValueError(f"ambiguous argument {arg!r}: matches {names}")
else:
raise ValueError(f"unknown argument {arg!r}")
def value_type(self, arg: str) -> Any:
return self.enum_type(arg).value
def choices_str(self, sep: str=', ', fmt: str='{!r}') -> str:
"""Return a user-formatted string of enum names"""
sortkey: Callable[[ET], Sortable] = getattr(
self.base, '_choices_sortkey', self._choices_sortkey,
)
return sep.join(
fmt.format(choice.name.lower())
for choice in sorted(self.base, key=sortkey)
)
def _choices_sortkey(self, choice: ET) -> Sortable:
return choice.name
class ExceptHook:
"""Common ExceptHook
Interactive tools can install an instance of this class as
``sys.excepthook``. It knows how to generate more user-friendly error
messages and exit codes for common exception types, and reports those to
the given logger (or the root logger if none was given).
"""
def __init__(self, logger: Optional[logging.Logger]=None) -> None:
if logger is None:
logger = logging.getLogger()
self.logger = logger
def __call__(self,
exc_type: Type[BaseException],
exc_value: BaseException,
exc_tb: types.TracebackType,
) -> NoReturn:
error_type = type(exc_value).__name__
msg = ": ".join(str(arg) for arg in exc_value.args)
if isinstance(exc_value, KeyboardInterrupt):
signal.signal(signal.SIGINT, signal.SIG_DFL)
os.kill(0, signal.SIGINT)
signal.pause()
elif isinstance(exc_value, (
rt_error.AuthorizationError,
rt_error.NotAllowed,
)):
exitcode = os.EX_NOPERM
error_type = "RT access denied"
elif isinstance(exc_value, rt_error.ConnectionError):
exitcode = os.EX_TEMPFAIL
error_type = "RT connection error"
elif isinstance(exc_value, rt_error.RtError):
exitcode = os.EX_UNAVAILABLE
error_type = f"RT {error_type}"
elif isinstance(exc_value, errors.RewriteRuleError):
exitcode = ExitCode.RewriteRulesError
msg = str(exc_value)
if exc_value.source is not None:
msg += f"\n\n source: {yaml.safe_dump(exc_value.source)}"
elif isinstance(exc_value, OSError):
if exc_value.filename is None:
exitcode = os.EX_OSERR
error_type = "OS error"
msg = exc_value.strerror
else:
# There are more specific exit codes for input problems vs.
# output problems, but without knowing how the file was
# intended to be used, we can't use them.
exitcode = os.EX_IOERR
error_type = "I/O error"
msg = f"{exc_value.filename}: {exc_value.strerror}"
else:
exitcode = os.EX_SOFTWARE
error_type = f"internal {error_type}"
self.logger.critical("%s%s%s", error_type, ": " if msg else "", msg)
self.logger.debug(
''.join(traceback.format_exception(exc_type, exc_value, exc_tb)),
)
raise SystemExit(exitcode)
class ExitCode(enum.IntEnum):
# BSD exit codes commonly used
NoConfiguration = os.EX_CONFIG
NoConfig = NoConfiguration
NoDataFiltered = os.EX_DATAERR
NoDataLoaded = os.EX_NOINPUT
OK = os.EX_OK
Ok = OK
RewriteRulesError = os.EX_DATAERR
# Our own exit codes, working down from that range
BeancountErrors = 63
class ExtendAction(argparse.Action):
"""argparse action to let a user build a list from a string
This is a fancier version of argparse's built-in ``action='append'``.
The user's input is turned into a list of strings, split by a regexp
pattern you provide. Typical usage looks like::
parser = argparse.ArgumentParser()
parser.add_argument(
'--option', ...,
action=ExtendAction,
const=regexp_pattern, # default is '\\s*,\\s*'
...,
)
"""
DEFAULT_PATTERN = r'\s*,\s*'
def __call__(self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: Union[Sequence[Any], str, None]=None,
option_string: Optional[str]=None,
) -> None:
pattern: str = self.const or self.DEFAULT_PATTERN
value: Optional[List[str]] = getattr(namespace, self.dest, None)
if value is None:
value = []
setattr(namespace, self.dest, value)
if values is None:
values = []
elif isinstance(values, str):
values = [values]
for s in values:
value.extend(re.split(pattern, s))
class InfoAction(argparse.Action):
"""argparse action to print information and exit
Use this for options like ``--version`` and ``--license``.
The argument ``const`` can either be a 2-tuple (str, int) with the
message to print and exit code to use; or just a string message, and it
will default to using the OK exit code 0.
"""
def __call__(self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: Union[Sequence[Any], str, None]=None,
option_string: Optional[str]=None,
) -> NoReturn:
if isinstance(self.const, str):
info = self.const
exitcode = 0
else:
info, exitcode = self.const
print(info)
raise SystemExit(exitcode)
class LogLevel(enum.IntEnum):
DEBUG = logging.DEBUG
INFO = logging.INFO
WARNING = logging.WARNING
ERROR = logging.ERROR
CRITICAL = logging.CRITICAL
WARN = WARNING
ERR = ERROR
CRIT = CRITICAL
def _choices_sortkey(self) -> Sortable:
return self.value
class SearchTerm(NamedTuple):
"""NamedTuple representing a user's metadata filter
SearchTerm knows how to parse and store posting metadata filters provided
by the user in `key=value` format. Reporting tools can use this to filter
postings that match the user's criteria, to report on subsets of the books.
Typical usage looks like::
argument_parser.add_argument(
'search_terms',
type=SearchTerm.arg_parser(),
…,
)
args = argument_parser.parse_args(…)
for query in args.search_terms:
postings = query.filter_postings(postings)
"""
meta_key: MetaKey
pattern: str
@classmethod
def arg_parser(cls,
default_key: Optional[str]=None,
ticket_default_key: Optional[str]=None,
) -> Callable[[str], 'SearchTerm']:
"""Build a SearchTerm parser
This method returns a function that can parse strings in ``key=value``
format and return a corresponding SearchTerm.
If you specify a default key, then strings that just specify a ``value``
will be parsed as if they said ``default_key=value``. Otherwise,
parsing strings without a metadata key will raise a ValueError.
If you specify a default key ticket links, then values in the format
``number``, ``rt:number``, or ``rt://ticket/number`` will be parsed as
if they said ``ticket_default_key=value``.
"""
if ticket_default_key is None:
ticket_default_key = default_key
def parse_search_term(arg: str) -> 'SearchTerm':
key: Optional[str] = None
if re.match(r'^[a-z][-\w]*=', arg):
key, _, raw_link = arg.partition('=')
else:
raw_link = arg
rt_ids = rtutil.RT.parse(raw_link)
if rt_ids is None:
rt_ids = rtutil.RT.parse('rt:' + raw_link)
if rt_ids is None:
if key is None:
key = default_key
pattern = r'(?:^|\s){}(?:\s|$)'.format(re.escape(raw_link))
else:
ticket_id, attachment_id = rt_ids
if key is None:
if attachment_id is None:
key = ticket_default_key
else:
key = default_key
pattern = rtutil.RT.metadata_regexp(
ticket_id,
attachment_id,
first_link_only=key == 'rt-id' and attachment_id is None,
)
if key is None:
raise ValueError(f"invalid search term {arg!r}: no metadata key")
return cls(key, pattern)
return parse_search_term
def filter_postings(self, postings: Iterable[data.Posting]) -> Iterator[data.Posting]:
return filters.filter_meta_match(
postings, self.meta_key, re.compile(self.pattern),
)
def add_jobs_argument(parser: argparse.ArgumentParser) -> argparse.Action:
return parser.add_argument(
'--jobs', '-j',
metavar='NUM',
type=jobs_arg,
default=CPU_COUNT,
help="""Maximum number of processes to run concurrently.
Can specify a positive integer or a percentage of CPU cores. Default all cores.
""")
def add_loglevel_argument(parser: argparse.ArgumentParser,
default: LogLevel=LogLevel.INFO) -> argparse.Action:
arg_enum = EnumArgument(LogLevel)
return parser.add_argument(
'--loglevel',
metavar='LEVEL',
default=default.value,
type=arg_enum.value_type,
help="Show logs at this level and above."
f" Specify one of {arg_enum.choices_str()}."
f" Default {default.name.lower()!r}.",
)
def add_rewrite_rules_argument(parser: argparse.ArgumentParser) -> argparse.Action:
return parser.add_argument(
'--rewrite-rules', '--rewrites', '-r',
action='append',
default=[],
metavar='PATH',
type=Path,
help="""Use rewrite rules from the given YAML file. You can specify
this option multiple times to load multiple sets of rewrite rules in order.
""")
def add_version_argument(parser: argparse.ArgumentParser) -> argparse.Action:
progname = parser.prog or sys.argv[0]
return parser.add_argument(
'--version', '--copyright', '--license',
action=InfoAction,
nargs=0,
const=f"{progname} version {VERSION}\n{LICENSE}",
help="Show program version and license information",
)
def can_run(
cmd: Sequence[str],
stdout: Optional[int]=subprocess.DEVNULL,
stderr: Optional[int]=None,
ok_returncodes: Container[int]=frozenset([0]),
) -> bool:
try:
with subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) as proc:
# Typing says this can be None, but I don't think that's true
# given that we passed stdin=PIPE.
proc.stdin.close() # type:ignore[union-attr]
except (OSError, subprocess.SubprocessError):
return False
else:
return proc.returncode in ok_returncodes
def date_arg(arg: str) -> datetime.date:
return datetime.datetime.strptime(arg, '%Y-%m-%d').date()
def diff_year(date: datetime.date, diff: int) -> datetime.date:
new_year = date.year + diff
try:
return date.replace(year=new_year)
except ValueError:
# The original date is Feb 29, which doesn't exist in the new year.
if diff < 0:
return datetime.date(new_year, 2, 28)
else:
return datetime.date(new_year, 3, 1)
def year_or_date_arg(arg: str) -> Union[int, datetime.date]:
"""Get either a date or a year (int) from an argument string
This is a useful argument type for arguments that will be passed into
Books loader methods which can accept either a fiscal year or a full date.
"""
try:
year = int(arg, 10)
except ValueError:
ok = False
else:
ok = datetime.MINYEAR <= year <= datetime.MAXYEAR
if ok:
return year
else:
return date_arg(arg)
def jobs_arg(arg: str) -> int:
if arg.endswith('%'):
arg_n = round(CPU_COUNT * 100 / int(arg[:-1]))
else:
arg_n = int(arg)
if arg_n < 1:
raise ValueError("--jobs argument must be a positive integer or percentage")
else:
return arg_n
def make_entry_point(mod_name: str, prog_name: str=sys.argv[0]) -> Callable[[], int]:
"""Create an entry_point function for a tool
The returned function is suitable for use as an entry_point in setup.py.
It sets up the root logger and excepthook, then calls the module's main
function.
"""
def entry_point(): # type:ignore
prog_mod = sys.modules[mod_name]
setup_logger()
prog_mod.logger = logging.getLogger(prog_name)
sys.excepthook = ExceptHook(prog_mod.logger)
return prog_mod.main()
return entry_point
def setup_logger(logger: Union[str, logging.Logger]='',
stream: TextIO=sys.stderr,
fmt: str='%(name)s: %(levelname)s: %(message)s',
) -> logging.Logger:
"""Set up a logger with a StreamHandler with the given format"""
if isinstance(logger, str):
logger = logging.getLogger(logger)
formatter = logging.Formatter(fmt)
handler = logging.StreamHandler(stream)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def set_loglevel(logger: logging.Logger, loglevel: int=logging.INFO) -> None:
"""Set the loglevel for a tool or module
If the given logger is not under a hierarchy, this function sets the
loglevel for the root logger, along with some specific levels for libraries
used by reporting tools. Otherwise, it's the same as
``logger.setLevel(loglevel)``.
"""
if '.' not in logger.name:
logger = logging.getLogger()
# pdfminer logs debug-like messages at the info level
logging.getLogger('pdfminer').setLevel(max(loglevel, logging.WARNING))
# At the debug level, the rt module logs the full body of every
# request and response. That's too much.
logging.getLogger('rt.rt').setLevel(max(loglevel, logging.INFO))
logger.setLevel(loglevel)
def bytes_output(path: Optional[Path]=None,
default: OutputFile=sys.stdout,
mode: str='w',
) -> BinaryIO:
"""Get a file-like object suitable for binary output
If ``path`` is ``None`` or ``-``, returns a file-like object backed by
``default``. If ``default`` is a file descriptor or text IO object, this
method returns a file-like object that writes to the same place.
Otherwise, returns ``path.open(mode)``.
"""
mode = f'{mode}b'
if path is None or path == STDSTREAM_PATH:
if isinstance(default, int):
retval = open(default, mode)
elif isinstance(default, TextIO):
retval = default.buffer
else:
retval = default
else:
retval = path.open(mode)
return cast(BinaryIO, retval)
def text_output(path: Optional[Path]=None,
default: OutputFile=sys.stdout,
mode: str='w',
encoding: Optional[str]=None,
) -> TextIO:
"""Get a file-like object suitable for text output
If ``path`` is ``None`` or ``-``, returns a file-like object backed by
``default``. If ``default`` is a file descriptor or binary IO object, this
method returns a file-like object that writes to the same place.
Otherwise, returns ``path.open(mode)``.
"""
if path is None or path == STDSTREAM_PATH:
if isinstance(default, int):
retval = open(default, mode, encoding=encoding)
elif isinstance(default, BinaryIO):
retval = io.TextIOWrapper(default, encoding=encoding)
else:
retval = default
else:
retval = path.open(mode, encoding=encoding)
return cast(TextIO, retval)