#!/usr/bin/python3 # Copyright © 2021, Bradley M. Kuhn # Also copyrighted by others as well (no notices shared), and it is # Licensed CC-BY-SA-4.0 because I borrowed an example from: https://www.py4u.net/discuss/208440 import sys import re from pathlib import Path INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)') FOOTNOTE_LINK_TEXT_RE = re.compile(r'\[([^\]]+)\]\[(\d+)\]') FOOTNOTE_LINK_URL_RE = re.compile(r'\[(\d+)\]:\s+(\S+)') def find_md_links(md): """ Return dict of links in markdown """ links = list(INLINE_LINK_RE.findall(md)) footnote_links = dict(FOOTNOTE_LINK_TEXT_RE.findall(md)) footnote_urls = dict(FOOTNOTE_LINK_URL_RE.findall(md)) for key in footnote_links.keys(): links.append((footnote_links[key], footnote_urls[footnote_links[key]])) return links body_markdown = sys.stdin.read() for (text, link) in find_md_links(body_markdown): errMsg = None if not re.match(r'^(mailto|http|ftp|#)', link, re.IGNORECASE): path = Path(link) if not path.is_file(): errMsg = "local file by that name does not exist for relative file link" # FIXME: test other types of links if errMsg: print("Bad link of \"%s\" (labeled as \"%s\") has error: %s" % (link, text, errMsg)) exit(0) # Below doesn't work, didn't have time to find out why import markdown from lxml import etree import sys body_markdown = sys.stdin.read() print(markdown.markdown(body_markdown)) doc = etree.fromstring(body) for link in doc.xpath('//a'): print(link.text, link.get('href'))