
The goal of this script is to go through a Markdown file and verify the links all work. Currently, only relative path links to files in the same directory are tested.
50 lines
1.5 KiB
Python
Executable file
50 lines
1.5 KiB
Python
Executable file
#!/usr/bin/python3
|
|
# Copyright © 2021, Bradley M. Kuhn
|
|
# Also copyrighted by others as well (no notices shared), and it is
|
|
# Licensed CC-BY-SA-4.0 because I borrowed an example from: https://www.py4u.net/discuss/208440
|
|
import sys
|
|
import re
|
|
from pathlib import Path
|
|
|
|
INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
|
|
FOOTNOTE_LINK_TEXT_RE = re.compile(r'\[([^\]]+)\]\[(\d+)\]')
|
|
FOOTNOTE_LINK_URL_RE = re.compile(r'\[(\d+)\]:\s+(\S+)')
|
|
|
|
|
|
def find_md_links(md):
|
|
""" Return dict of links in markdown """
|
|
|
|
links = list(INLINE_LINK_RE.findall(md))
|
|
footnote_links = dict(FOOTNOTE_LINK_TEXT_RE.findall(md))
|
|
footnote_urls = dict(FOOTNOTE_LINK_URL_RE.findall(md))
|
|
|
|
for key in footnote_links.keys():
|
|
links.append((footnote_links[key], footnote_urls[footnote_links[key]]))
|
|
|
|
return links
|
|
|
|
body_markdown = sys.stdin.read()
|
|
|
|
for (text, link) in find_md_links(body_markdown):
|
|
errMsg = None
|
|
if not re.match(r'^(mailto|http|ftp|#)', link, re.IGNORECASE):
|
|
path = Path(link)
|
|
if not path.is_file():
|
|
errMsg = "local file by that name does not exist for relative file link"
|
|
# FIXME: test other types of links
|
|
if errMsg:
|
|
print("Bad link of \"%s\" (labeled as \"%s\") has error: %s" % (link, text, errMsg))
|
|
|
|
exit(0)
|
|
# Below doesn't work, didn't have time to find out why
|
|
|
|
|
|
import markdown
|
|
from lxml import etree
|
|
import sys
|
|
|
|
body_markdown = sys.stdin.read()
|
|
print(markdown.markdown(body_markdown))
|
|
doc = etree.fromstring(body)
|
|
for link in doc.xpath('//a'):
|
|
print(link.text, link.get('href'))
|