From 94c56bb468cb9ad85c9f92105a32449ae0b0e216 Mon Sep 17 00:00:00 2001 From: Ben Sturmfels Date: Wed, 13 Mar 2024 13:13:42 +1100 Subject: [PATCH] Rewrite the `index` view to avoid risk of path traversal I've simplified this view by removing the custom HTTP error handlers, Python 3.5 exception handling and adding documentation. --- conservancy/tests.py | 36 ++++++++++++++++++++ conservancy/urls.py | 2 -- conservancy/views.py | 78 +++++++++++++++++++------------------------- 3 files changed, 70 insertions(+), 46 deletions(-) create mode 100644 conservancy/tests.py diff --git a/conservancy/tests.py b/conservancy/tests.py new file mode 100644 index 00000000..43b46528 --- /dev/null +++ b/conservancy/tests.py @@ -0,0 +1,36 @@ +import datetime + +from django.http import Http404 +from django.test import RequestFactory, TestCase + +from . import views +from conservancy.fundgoal.models import FundraisingGoal + + +class ContentTest(TestCase): + def setUp(self): + self.factory = RequestFactory() + FundraisingGoal.objects.create( + fundraiser_code_name='cy2023-end-year-match', + fundraiser_goal_amount=0, + fundraiser_so_far_amount=0, + fundraiser_donation_count=0, + fundraiser_donation_count_disclose_threshold=0, + fundraiser_endtime=datetime.datetime(2000, 1, 1) + ) + + def test_about_page_served(self): + request = self.factory.get('/about/') + with self.assertTemplateUsed('about/index.html'): + response = views.index(request).render() + self.assertContains(response, 'Conservancy is a nonprofit organization') + + def test_annual_report_file_served(self): + request = self.factory.get('/docs/conservancy_annual-report_fy-2011.pdf') + response = views.index(request) + self.assertEqual(response.headers['Content-Type'], 'application/pdf') + + def test_path_traversal_404s(self): + request = self.factory.get('/about/../../settings.py') + with self.assertRaises(Http404): + views.index(request) diff --git a/conservancy/urls.py b/conservancy/urls.py index 1e9e7b71..dad74f22 100644 --- a/conservancy/urls.py +++ b/conservancy/urls.py @@ -40,8 +40,6 @@ urlpatterns = [ url(r'^news/', include('conservancy.news.urls')), url(r'^blog/', include('conservancy.blog.urls')), # formerly static templated things... (dirs with templates) - url(r'^error/(40[134]|500)(?:/index\.html|/|)$', static_views.handler), - url(r'^error', static_views.index), url(r'^about', static_views.index), url(r'^activities', static_views.index), url(r'^donate', static_views.index), diff --git a/conservancy/views.py b/conservancy/views.py index bcdd2a78..fe4de08d 100644 --- a/conservancy/views.py +++ b/conservancy/views.py @@ -1,61 +1,51 @@ import mimetypes -import os.path -from django.http import HttpResponse +from django.conf import settings +from django.http import Http404 +from django.http import FileResponse from django.template.response import TemplateResponse from .local_context_processors import fundgoal_lookup -# This component doesn't actually have anything to do with Django's -# "staticfiles" app. It's only that its templates happen to be located in the -# STATIC_ROOT directory. They probably shouldn't be there. -STATIC_ROOT = os.path.abspath(os.path.dirname(__file__)) -FILESYSTEM_ENCODING = 'utf-8' - -def handler(request, errorcode): - path = os.path.join('error', str(errorcode), 'index.html') - fullpath = os.path.join(STATIC_ROOT, path) - if not os.path.exists(fullpath): - return HttpResponse("Internal error: " + path, status=int(errorcode)) - else: - return TemplateResponse(request, path, status=int(errorcode)) - -def handler401(request): - return handler(request, 401) - -def handler403(request): - return handler(request, 403) - -def handler404(request): - return handler(request, 404) - -def handler500(request): - return handler(request, 500) - def index(request, *args, **kwargs): + """Faux CMS: bulk website content stored in templates and document files. + + Rationale: Many websites have a CMS and store the majority of their website + content in a relational database eg. WordPress or Wagtail. That's useful + because various people can easily be given access to edit the website. The + downside is that is application complexity - the management of who change + what, when it changed and what changed becomes an application concern. At + the other end of the spectrum, we have files that are checked into a Git + repository - we get the precise who/what/when out of the box with Git, but + require you to have some technical knowledge and appropriate access to + commit. Since you're committing to a code repository, this also opens up the + possibility to break things you couldn't break via a CMS. + + This view serves most of the textual pages and documents on + sfconservancy.org. It works a little like Apache serving mixed PHP/static + files - it looks at the URL and tries to find a matching file on the + filesystem. If it finds a template, it renders it via Django's template + infrastructure. If it finds a file but it's not a template, it will serve + the file as-is. + """ + # The name "static" has no connection to Django staticfiles. + base_path = settings.BASE_DIR / 'static' path = request.path.lstrip('/') if path.endswith('/'): path += 'index.html' - fullpath = os.path.join(STATIC_ROOT, 'static', path) - try: - # Junk URLs in production (Python 3.5) are causing UnicodeEncodeErrors - # here. Can't reproduce in development in Python 3.9 - only Python 2.7. - if not os.path.exists(fullpath): - return handler404(request) - except UnicodeEncodeError: - return handler404(request) - content_type, _ = mimetypes.guess_type(path) - if content_type != 'text/html': - return HttpResponse(open(fullpath, 'rb'), content_type) + full_path = (base_path / path).resolve() + safe_from_path_traversal = full_path.is_relative_to(base_path) + if not full_path.exists() or not safe_from_path_traversal: + raise Http404() + is_template = mimetypes.guess_type(full_path)[0] == 'text/html' + if not is_template: + return FileResponse(open(full_path, 'rb')) else: context = kwargs.copy() try: context['fundgoal'] = fundgoal_lookup(kwargs['fundraiser_sought']) except KeyError: pass + # Maybe this should open() the template file directly so that these + # don't have to be included in the global template TEMPLATES.DIRS? return TemplateResponse(request, path, context) - -def debug(request): - path = request.get_full_path() - path = path.lstrip('/') - return HttpResponse("Hello, static world: " + path)