extract_odf_links: Add --relative-to option.
This is just too convenient for our usual case where we expect all links to be relative to a specific directory (the repository).
This commit is contained in:
parent
cf2d825a08
commit
2eba5a5546
2 changed files with 54 additions and 15 deletions
|
@ -72,29 +72,39 @@ supported in Python like \\n, \\t, \\0, \\u, etc. Default `%(default)s`.
|
||||||
dest='delimiter',
|
dest='delimiter',
|
||||||
const='\0',
|
const='\0',
|
||||||
help="""Shortcut for --delimiter=\\0
|
help="""Shortcut for --delimiter=\\0
|
||||||
|
""")
|
||||||
|
parser.add_argument(
|
||||||
|
'--relative-to', '-r',
|
||||||
|
metavar='PATH',
|
||||||
|
type=Path,
|
||||||
|
help="""Try to resolve all links relative to this path, rather than each
|
||||||
|
spreadsheet's path
|
||||||
""")
|
""")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'odf_paths',
|
'odf_paths',
|
||||||
metavar='ODF_PATH',
|
metavar='ODF_PATH',
|
||||||
type=Path,
|
type=Path,
|
||||||
nargs=argparse.ONE_OR_MORE,
|
nargs=argparse.ONE_OR_MORE,
|
||||||
help="""ODF file(s) to extract links from. Note that %(prog)s cannot
|
help="""ODF file(s) to extract links from
|
||||||
read from stdin because it needs to know document paths to resolve links.
|
|
||||||
""")
|
""")
|
||||||
return parser.parse_args(arglist)
|
args = parser.parse_args(arglist)
|
||||||
|
if args.relative_to is None:
|
||||||
|
if any(path == cliutil.STDSTREAM_PATH for path in args.odf_paths):
|
||||||
|
parser.error("--relative-to is required to read from stdin")
|
||||||
|
elif args.relative_to.is_dir() or not args.relative_to.exists():
|
||||||
|
args.relative_to /= 'PathStub.ods'
|
||||||
|
return args
|
||||||
|
|
||||||
def extract_links(odf_path: Path) -> Iterator[Path]:
|
def extract_links(odf_doc: odf.opendocument.OpenDocument, rel_path: Path) -> Iterator[Path]:
|
||||||
with odf_path.open('rb') as odf_file:
|
|
||||||
odf_doc = odf.opendocument.load(odf_file)
|
|
||||||
for a_elem in odf_doc.getElementsByType(odf.text.A):
|
for a_elem in odf_doc.getElementsByType(odf.text.A):
|
||||||
parts = urllib.parse.urlparse(a_elem.getAttribute('href') or '')
|
parts = urllib.parse.urlparse(a_elem.getAttribute('href') or '')
|
||||||
if (parts.scheme and parts.scheme != 'file') or not parts.path:
|
if (parts.scheme and parts.scheme != 'file') or not parts.path:
|
||||||
continue
|
continue
|
||||||
path = Path(urllib.parse.unquote(parts.path))
|
path = Path(urllib.parse.unquote(parts.path))
|
||||||
if not path.is_absolute():
|
if not path.is_absolute():
|
||||||
path = (odf_path / path).resolve()
|
path = (rel_path / path).resolve()
|
||||||
try:
|
try:
|
||||||
path.relative_to(odf_path)
|
path.relative_to(rel_path)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
@ -113,7 +123,9 @@ def main(arglist: Optional[Sequence[str]]=None,
|
||||||
links: Set[Path] = set()
|
links: Set[Path] = set()
|
||||||
for odf_path in args.odf_paths:
|
for odf_path in args.odf_paths:
|
||||||
try:
|
try:
|
||||||
links.update(extract_links(odf_path))
|
with cliutil.bytes_output(odf_path, sys.stdin, 'r') as odf_file:
|
||||||
|
odf_doc = odf.opendocument.load(odf_file)
|
||||||
|
links.update(extract_links(odf_doc, args.relative_to or odf_path))
|
||||||
except IOError as error:
|
except IOError as error:
|
||||||
logger.error("error reading %s: %s", odf_path, error.strerror)
|
logger.error("error reading %s: %s", odf_path, error.strerror)
|
||||||
returncode = os.EX_DATAERR
|
returncode = os.EX_DATAERR
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -38,6 +39,14 @@ def expected_links(rel_path):
|
||||||
for path in INCLUDED_FILE_LINKS
|
for path in INCLUDED_FILE_LINKS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def check_output(stdout, sep, rel_path):
|
||||||
|
actual = stdout.getvalue().split(sep)
|
||||||
|
if actual and not actual[-1]:
|
||||||
|
actual.pop()
|
||||||
|
expected = expected_links(rel_path)
|
||||||
|
assert len(actual) == len(expected)
|
||||||
|
assert set(actual) == expected
|
||||||
|
|
||||||
@pytest.mark.parametrize('arglist,sep', [
|
@pytest.mark.parametrize('arglist,sep', [
|
||||||
(['-0'], '\0'),
|
(['-0'], '\0'),
|
||||||
(['-d', '\\v'], '\v'),
|
(['-d', '\\v'], '\v'),
|
||||||
|
@ -50,15 +59,33 @@ def test_extract_file_links(arglist, sep, caplog):
|
||||||
exitcode = extract_odf_links.main(arglist, stdout, stderr)
|
exitcode = extract_odf_links.main(arglist, stdout, stderr)
|
||||||
assert exitcode == 0
|
assert exitcode == 0
|
||||||
assert not stderr.getvalue()
|
assert not stderr.getvalue()
|
||||||
actual = stdout.getvalue().split(sep)
|
check_output(stdout, sep, SRC_PATH.parent)
|
||||||
if actual and not actual[-1]:
|
|
||||||
actual.pop()
|
|
||||||
expected = expected_links(SRC_PATH.parent)
|
|
||||||
assert len(actual) == len(expected)
|
|
||||||
assert set(actual) == expected
|
|
||||||
assert caplog.records
|
assert caplog.records
|
||||||
assert any(
|
assert any(
|
||||||
log.levelname == 'WARNING'
|
log.levelname == 'WARNING'
|
||||||
and log.message.endswith('/Bad Link.txt not found')
|
and log.message.endswith('/Bad Link.txt not found')
|
||||||
for log in caplog.records
|
for log in caplog.records
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('rel_path', [
|
||||||
|
Path('/run'),
|
||||||
|
Path('/tmp'),
|
||||||
|
])
|
||||||
|
def test_extract_relative_to(rel_path):
|
||||||
|
arglist = ['--relative', str(rel_path), '-0', '-']
|
||||||
|
stdout = io.StringIO()
|
||||||
|
stderr = io.StringIO()
|
||||||
|
orig_stdin = sys.stdin
|
||||||
|
try:
|
||||||
|
sys.stdin = SRC_PATH.open('rb')
|
||||||
|
exitcode = extract_odf_links.main(arglist, stdout, stderr)
|
||||||
|
finally:
|
||||||
|
sys.stdin = orig_stdin
|
||||||
|
assert exitcode == 0
|
||||||
|
assert not stderr.getvalue()
|
||||||
|
check_output(stdout, '\0', rel_path)
|
||||||
|
|
||||||
|
def test_reading_stdin_requires_relative_to():
|
||||||
|
with pytest.raises(SystemExit) as exc_check:
|
||||||
|
extract_odf_links.main(['-'])
|
||||||
|
assert exc_check.value.args[0] == 2
|
||||||
|
|
Loading…
Reference in a new issue