Rewrite the index view to avoid risk of path traversal

I've simplified this view by removing the custom HTTP error handlers, Python 3.5
exception handling and adding documentation.
This commit is contained in:
Ben Sturmfels 2024-03-13 13:13:42 +11:00
parent e50baa3f96
commit 94c56bb468
Signed by: bsturmfels
GPG key ID: 023C05E2C9C068F0
3 changed files with 70 additions and 46 deletions

36
conservancy/tests.py Normal file
View file

@ -0,0 +1,36 @@
import datetime
from django.http import Http404
from django.test import RequestFactory, TestCase
from . import views
from conservancy.fundgoal.models import FundraisingGoal
class ContentTest(TestCase):
def setUp(self):
self.factory = RequestFactory()
FundraisingGoal.objects.create(
fundraiser_code_name='cy2023-end-year-match',
fundraiser_goal_amount=0,
fundraiser_so_far_amount=0,
fundraiser_donation_count=0,
fundraiser_donation_count_disclose_threshold=0,
fundraiser_endtime=datetime.datetime(2000, 1, 1)
)
def test_about_page_served(self):
request = self.factory.get('/about/')
with self.assertTemplateUsed('about/index.html'):
response = views.index(request).render()
self.assertContains(response, 'Conservancy is a nonprofit organization')
def test_annual_report_file_served(self):
request = self.factory.get('/docs/conservancy_annual-report_fy-2011.pdf')
response = views.index(request)
self.assertEqual(response.headers['Content-Type'], 'application/pdf')
def test_path_traversal_404s(self):
request = self.factory.get('/about/../../settings.py')
with self.assertRaises(Http404):
views.index(request)

View file

@ -40,8 +40,6 @@ urlpatterns = [
url(r'^news/', include('conservancy.news.urls')),
url(r'^blog/', include('conservancy.blog.urls')),
# formerly static templated things... (dirs with templates)
url(r'^error/(40[134]|500)(?:/index\.html|/|)$', static_views.handler),
url(r'^error', static_views.index),
url(r'^about', static_views.index),
url(r'^activities', static_views.index),
url(r'^donate', static_views.index),

View file

@ -1,61 +1,51 @@
import mimetypes
import os.path
from django.http import HttpResponse
from django.conf import settings
from django.http import Http404
from django.http import FileResponse
from django.template.response import TemplateResponse
from .local_context_processors import fundgoal_lookup
# This component doesn't actually have anything to do with Django's
# "staticfiles" app. It's only that its templates happen to be located in the
# STATIC_ROOT directory. They probably shouldn't be there.
STATIC_ROOT = os.path.abspath(os.path.dirname(__file__))
FILESYSTEM_ENCODING = 'utf-8'
def handler(request, errorcode):
path = os.path.join('error', str(errorcode), 'index.html')
fullpath = os.path.join(STATIC_ROOT, path)
if not os.path.exists(fullpath):
return HttpResponse("Internal error: " + path, status=int(errorcode))
else:
return TemplateResponse(request, path, status=int(errorcode))
def handler401(request):
return handler(request, 401)
def handler403(request):
return handler(request, 403)
def handler404(request):
return handler(request, 404)
def handler500(request):
return handler(request, 500)
def index(request, *args, **kwargs):
"""Faux CMS: bulk website content stored in templates and document files.
Rationale: Many websites have a CMS and store the majority of their website
content in a relational database eg. WordPress or Wagtail. That's useful
because various people can easily be given access to edit the website. The
downside is that is application complexity - the management of who change
what, when it changed and what changed becomes an application concern. At
the other end of the spectrum, we have files that are checked into a Git
repository - we get the precise who/what/when out of the box with Git, but
require you to have some technical knowledge and appropriate access to
commit. Since you're committing to a code repository, this also opens up the
possibility to break things you couldn't break via a CMS.
This view serves most of the textual pages and documents on
sfconservancy.org. It works a little like Apache serving mixed PHP/static
files - it looks at the URL and tries to find a matching file on the
filesystem. If it finds a template, it renders it via Django's template
infrastructure. If it finds a file but it's not a template, it will serve
the file as-is.
"""
# The name "static" has no connection to Django staticfiles.
base_path = settings.BASE_DIR / 'static'
path = request.path.lstrip('/')
if path.endswith('/'):
path += 'index.html'
fullpath = os.path.join(STATIC_ROOT, 'static', path)
try:
# Junk URLs in production (Python 3.5) are causing UnicodeEncodeErrors
# here. Can't reproduce in development in Python 3.9 - only Python 2.7.
if not os.path.exists(fullpath):
return handler404(request)
except UnicodeEncodeError:
return handler404(request)
content_type, _ = mimetypes.guess_type(path)
if content_type != 'text/html':
return HttpResponse(open(fullpath, 'rb'), content_type)
full_path = (base_path / path).resolve()
safe_from_path_traversal = full_path.is_relative_to(base_path)
if not full_path.exists() or not safe_from_path_traversal:
raise Http404()
is_template = mimetypes.guess_type(full_path)[0] == 'text/html'
if not is_template:
return FileResponse(open(full_path, 'rb'))
else:
context = kwargs.copy()
try:
context['fundgoal'] = fundgoal_lookup(kwargs['fundraiser_sought'])
except KeyError:
pass
# Maybe this should open() the template file directly so that these
# don't have to be included in the global template TEMPLATES.DIRS?
return TemplateResponse(request, path, context)
def debug(request):
path = request.get_full_path()
path = path.lstrip('/')
return HttpResponse("Hello, static world: " + path)