diff --git a/verify-markdown-links.py b/verify-markdown-links.py new file mode 100755 index 0000000..3da50d2 --- /dev/null +++ b/verify-markdown-links.py @@ -0,0 +1,50 @@ +#!/usr/bin/python3 +# Copyright © 2021, Bradley M. Kuhn +# Also copyrighted by others as well (no notices shared), and it is +# Licensed CC-BY-SA-4.0 because I borrowed an example from: https://www.py4u.net/discuss/208440 +import sys +import re +from pathlib import Path + +INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)') +FOOTNOTE_LINK_TEXT_RE = re.compile(r'\[([^\]]+)\]\[(\d+)\]') +FOOTNOTE_LINK_URL_RE = re.compile(r'\[(\d+)\]:\s+(\S+)') + + +def find_md_links(md): + """ Return dict of links in markdown """ + + links = list(INLINE_LINK_RE.findall(md)) + footnote_links = dict(FOOTNOTE_LINK_TEXT_RE.findall(md)) + footnote_urls = dict(FOOTNOTE_LINK_URL_RE.findall(md)) + + for key in footnote_links.keys(): + links.append((footnote_links[key], footnote_urls[footnote_links[key]])) + + return links + +body_markdown = sys.stdin.read() + +for (text, link) in find_md_links(body_markdown): + errMsg = None + if not re.match(r'^(mailto|http|ftp|#)', link, re.IGNORECASE): + path = Path(link) + if not path.is_file(): + errMsg = "local file by that name does not exist for relative file link" + # FIXME: test other types of links + if errMsg: + print("Bad link of \"%s\" (labeled as \"%s\") has error: %s" % (link, text, errMsg)) + +exit(0) +# Below doesn't work, didn't have time to find out why + + +import markdown +from lxml import etree +import sys + +body_markdown = sys.stdin.read() +print(markdown.markdown(body_markdown)) +doc = etree.fromstring(body) +for link in doc.xpath('//a'): + print(link.text, link.get('href'))