small-hacks/verify-markdown-links.py

53 lines
1.6 KiB
Python
Raw Normal View History

#!/usr/bin/python3
# Copyright © 2021, Bradley M. Kuhn
# Also copyrighted by others as well (no notices shared), and it is
# Licensed CC-BY-SA-4.0 because I borrowed an example from: https://www.py4u.net/discuss/208440
import sys
import re
from pathlib import Path
INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
FOOTNOTE_LINK_TEXT_RE = re.compile(r'\[([^\]]+)\]\[(\d+)\]')
FOOTNOTE_LINK_URL_RE = re.compile(r'\[(\d+)\]:\s+(\S+)')
def find_md_links(md):
""" Return dict of links in markdown """
links = list(INLINE_LINK_RE.findall(md))
footnote_links = dict(FOOTNOTE_LINK_TEXT_RE.findall(md))
footnote_urls = dict(FOOTNOTE_LINK_URL_RE.findall(md))
for key in footnote_links.keys():
links.append((footnote_links[key], footnote_urls[footnote_links[key]]))
return links
body_markdown = sys.stdin.read()
for (text, link) in find_md_links(body_markdown):
errMsg = None
if not re.match(r'^(mailto|http|ftp|#)', link, re.IGNORECASE):
path = Path(link)
if not path.is_file() and not path.is_dir():
errMsg = "local file by that name does not exist for relative file link"
else:
print(path)
# FIXME: test other types of links
if errMsg:
print("# Bad link of \"%s\" (labeled as \"%s\") has error: %s" % (link, text, errMsg))
exit(0)
# Below doesn't work, didn't have time to find out why
import markdown
from lxml import etree
import sys
body_markdown = sys.stdin.read()
print(markdown.markdown(body_markdown))
doc = etree.fromstring(body)
for link in doc.xpath('//a'):
print(link.text, link.get('href'))