meta_repo_links: Check all non-RT links. RT#12517

This is less "future-proof," but the thing is, it's premature to try
to anticipate what other link formats will be in the future. See
discussion in comments.
This commit is contained in:
Brett Smith 2020-09-05 14:59:13 -04:00
parent c2851f5cc0
commit da056917bf
3 changed files with 22 additions and 4 deletions

View file

@ -20,6 +20,7 @@ from . import core
from .. import config as configmod from .. import config as configmod
from .. import data from .. import data
from .. import errors as errormod from .. import errors as errormod
from .. import rtutil
from ..beancount_types import ( from ..beancount_types import (
MetaKey, MetaKey,
MetaValue, MetaValue,
@ -35,7 +36,6 @@ from typing import (
class MetaRepoLinks(core.TransactionHook): class MetaRepoLinks(core.TransactionHook):
HOOK_GROUPS = frozenset(['linkcheck']) HOOK_GROUPS = frozenset(['linkcheck'])
LINK_METADATA = data.LINK_METADATA.difference('rt-id') LINK_METADATA = data.LINK_METADATA.difference('rt-id')
PATH_PUNCT_RE = re.compile(r'[:/]')
SKIP_FLAGS = '!' SKIP_FLAGS = '!'
def __init__(self, config: configmod.Config) -> None: def __init__(self, config: configmod.Config) -> None:
@ -57,8 +57,20 @@ class MetaRepoLinks(core.TransactionHook):
yield errormod.InvalidMetadataError(txn, key, meta[key], post) yield errormod.InvalidMetadataError(txn, key, meta[key], post)
else: else:
for link in links: for link in links:
match = self.PATH_PUNCT_RE.search(link) # FUTURE DEVELOPMENT NOTE: As of this writing we have two
if match and match.group(0) == ':': # link checkers. Right now the division of reporting
# responsibility is: MetaRTLinks reports problems with any
# link that starts with `rt:`, while this checker reports
# problems with anything else.
# If we add more link checkers in the future, we might need
# to give more thought about which checker is responsible
# for flagging links in "unknown formats." But today, I
# can't write better code that anticipates that need
# without having any idea of what future link formats will
# look like. Today, asking the RT parser "can you handle
# this?" is the safest way to make sure all bad links get
# reported.
if rtutil.RT.parse(link) is not None:
pass pass
elif not (self.repo_path / link).exists(): elif not (self.repo_path / link).exists():
yield errormod.BrokenLinkError(txn, key, link) yield errormod.BrokenLinkError(txn, key, link)

View file

@ -5,7 +5,7 @@ from setuptools import setup
setup( setup(
name='conservancy_beancount', name='conservancy_beancount',
description="Plugin, library, and reports for reading Conservancy's books", description="Plugin, library, and reports for reading Conservancy's books",
version='1.9.6', version='1.9.7',
author='Software Freedom Conservancy', author='Software Freedom Conservancy',
author_email='info@sfconservancy.org', author_email='info@sfconservancy.org',
license='GNU AGPLv3+', license='GNU AGPLv3+',

View file

@ -45,6 +45,12 @@ GOOD_LINKS = [Path(s) for s in [
BAD_LINKS = [Path(s) for s in [ BAD_LINKS = [Path(s) for s in [
'NonexistentDirectory/NonexistentFile1.txt', 'NonexistentDirectory/NonexistentFile1.txt',
'NonexistentDirectory/NonexistentFile2.txt', 'NonexistentDirectory/NonexistentFile2.txt',
'egproto:',
'egproto:123',
'egproto:123/456',
'egproto:foo'
'egproto:/foo/bar',
';egproto::',
]] ]]
NOT_FOUND_MSG = '{} not found in repository: {}'.format NOT_FOUND_MSG = '{} not found in repository: {}'.format