First draft of script to verify links in Markdown file work
The goal of this script is to go through a Markdown file and verify the links all work. Currently, only relative path links to files in the same directory are tested.
This commit is contained in:
parent
1c5b337d12
commit
21d222a49d
1 changed files with 50 additions and 0 deletions
50
verify-markdown-links.py
Executable file
50
verify-markdown-links.py
Executable file
|
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# Copyright © 2021, Bradley M. Kuhn
|
||||||
|
# Also copyrighted by others as well (no notices shared), and it is
|
||||||
|
# Licensed CC-BY-SA-4.0 because I borrowed an example from: https://www.py4u.net/discuss/208440
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
|
||||||
|
FOOTNOTE_LINK_TEXT_RE = re.compile(r'\[([^\]]+)\]\[(\d+)\]')
|
||||||
|
FOOTNOTE_LINK_URL_RE = re.compile(r'\[(\d+)\]:\s+(\S+)')
|
||||||
|
|
||||||
|
|
||||||
|
def find_md_links(md):
|
||||||
|
""" Return dict of links in markdown """
|
||||||
|
|
||||||
|
links = list(INLINE_LINK_RE.findall(md))
|
||||||
|
footnote_links = dict(FOOTNOTE_LINK_TEXT_RE.findall(md))
|
||||||
|
footnote_urls = dict(FOOTNOTE_LINK_URL_RE.findall(md))
|
||||||
|
|
||||||
|
for key in footnote_links.keys():
|
||||||
|
links.append((footnote_links[key], footnote_urls[footnote_links[key]]))
|
||||||
|
|
||||||
|
return links
|
||||||
|
|
||||||
|
body_markdown = sys.stdin.read()
|
||||||
|
|
||||||
|
for (text, link) in find_md_links(body_markdown):
|
||||||
|
errMsg = None
|
||||||
|
if not re.match(r'^(mailto|http|ftp|#)', link, re.IGNORECASE):
|
||||||
|
path = Path(link)
|
||||||
|
if not path.is_file():
|
||||||
|
errMsg = "local file by that name does not exist for relative file link"
|
||||||
|
# FIXME: test other types of links
|
||||||
|
if errMsg:
|
||||||
|
print("Bad link of \"%s\" (labeled as \"%s\") has error: %s" % (link, text, errMsg))
|
||||||
|
|
||||||
|
exit(0)
|
||||||
|
# Below doesn't work, didn't have time to find out why
|
||||||
|
|
||||||
|
|
||||||
|
import markdown
|
||||||
|
from lxml import etree
|
||||||
|
import sys
|
||||||
|
|
||||||
|
body_markdown = sys.stdin.read()
|
||||||
|
print(markdown.markdown(body_markdown))
|
||||||
|
doc = etree.fromstring(body)
|
||||||
|
for link in doc.xpath('//a'):
|
||||||
|
print(link.text, link.get('href'))
|
||||||
Loading…
Add table
Reference in a new issue