From 0652471164ecad89ce35c06a025669631141a5ec Mon Sep 17 00:00:00 2001 From: Sachi King Date: Fri, 21 Apr 2017 10:34:48 +1000 Subject: [PATCH] Sanitize user input on markdown fields This is an XSS vulnribilitiy. This also blocks a number of MD attributes that a user might attempt to use. The following are the allowed attributes. ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'p', 'pre', 'strong', 'ul'] I belive this to be acceptable, as honeslty, a speaker using H1 is going to stomp all over the page and make it harder for the reviewer to parse. UX wise, it's less than great. A user can do # title and be left with

in the sanitized output. --- requirements/base.txt | 2 +- symposion/markdown_parser.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 5a955d0d..830c9b29 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -6,7 +6,7 @@ django-sitetree>=1.7.0 django-taggit==0.18.0 django-timezone-field>=2.0 easy-thumbnails==2.3 -html5lib==0.9999999 +bleach markdown==2.6.5 pytz==2015.7 django-ical==1.4 diff --git a/symposion/markdown_parser.py b/symposion/markdown_parser.py index b3eaa53c..d92a5020 100644 --- a/symposion/markdown_parser.py +++ b/symposion/markdown_parser.py @@ -1,17 +1,14 @@ from __future__ import unicode_literals +import bleach import markdown +tags = bleach.sanitizer.ALLOWED_TAGS[:] +tags.extend(['p', 'pre']) + + def parse(text): - - # First run through the Markdown parser - text = markdown.markdown(text, extensions=["extra"], safe_mode=False) - - # Sanitize using html5lib - # bits = [] - # parser = html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer) - # for token in parser.parseFragment(text).childNodes: - # bits.append(token.toxml()) - # return "".join(bits) + md = markdown.markdown(text, extensions=['extra']) + text = bleach.clean(md, tags=tags) return text