2015-07-18 07:09:17 +00:00
|
|
|
from __future__ import unicode_literals
|
2012-07-18 23:20:51 +00:00
|
|
|
from html5lib import html5parser, sanitizer
|
|
|
|
|
|
|
|
import markdown
|
|
|
|
|
|
|
|
|
|
|
|
def parse(text):
|
2014-07-30 18:19:26 +00:00
|
|
|
|
2012-07-18 23:20:51 +00:00
|
|
|
# First run through the Markdown parser
|
|
|
|
text = markdown.markdown(text, extensions=["extra"], safe_mode=False)
|
2014-07-30 18:19:26 +00:00
|
|
|
|
2012-07-18 23:20:51 +00:00
|
|
|
# Sanitize using html5lib
|
|
|
|
bits = []
|
|
|
|
parser = html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
|
|
|
|
for token in parser.parseFragment(text).childNodes:
|
|
|
|
bits.append(token.toxml())
|
|
|
|
return "".join(bits)
|