blog/news: Add Open Graph metadata to entry pages.
This helps other social media sites generate nice previews for these pages.
This commit is contained in:
parent
87961c6cee
commit
3b2ed8397d
7 changed files with 228 additions and 2 deletions
|
@ -1,5 +1,6 @@
|
|||
from django.db import models
|
||||
from django.conf import settings
|
||||
from conservancy import bsoup
|
||||
from conservancy.apps.staff.models import Person
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
@ -18,7 +19,7 @@ class EntryTag(models.Model):
|
|||
def get_absolute_url(self):
|
||||
return u"/blog/?tag=%s" % self.slug
|
||||
|
||||
class Entry(models.Model):
|
||||
class Entry(models.Model, bsoup.SoupModelMixin):
|
||||
"""Blog entry"""
|
||||
|
||||
headline = models.CharField(max_length=200)
|
||||
|
@ -38,6 +39,8 @@ class Entry(models.Model):
|
|||
ordering = ('-pub_date',)
|
||||
get_latest_by = 'pub_date'
|
||||
|
||||
SOUP_ATTRS = ['body']
|
||||
|
||||
def __unicode__(self):
|
||||
return self.headline
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
from django.db import models
|
||||
from django.conf import settings
|
||||
from conservancy import bsoup
|
||||
from conservancy.apps.staff.models import Person
|
||||
from conservancy.apps.events.models import Event
|
||||
from django.contrib.sites.models import Site
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
class PressRelease(models.Model):
|
||||
class PressRelease(models.Model, bsoup.SoupModelMixin):
|
||||
"""News release model"""
|
||||
|
||||
headline = models.CharField(max_length=300)
|
||||
|
@ -24,6 +25,8 @@ class PressRelease(models.Model):
|
|||
ordering = ("-pub_date",)
|
||||
get_latest_by = "pub_date"
|
||||
|
||||
SOUP_ATTRS = ['summary', 'body']
|
||||
|
||||
def __unicode__(self):
|
||||
return self.headline
|
||||
|
||||
|
|
144
www/conservancy/bsoup.py
Normal file
144
www/conservancy/bsoup.py
Normal file
|
@ -0,0 +1,144 @@
|
|||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import io
|
||||
import re
|
||||
|
||||
import bs4
|
||||
import bs4.element
|
||||
|
||||
class BeautifulSoup(bs4.BeautifulSoup):
|
||||
"""A wrapper of the original BeautifulSoup class, with convenience methods added."""
|
||||
|
||||
IMAGE_ATTRS = {
|
||||
'img': 'src',
|
||||
'video': 'poster',
|
||||
}
|
||||
NON_BODY_TEXT_TAGS = frozenset([
|
||||
'img',
|
||||
'video',
|
||||
])
|
||||
SENTENCE_END = re.compile(r'[.?!]\s*\W*\s*$')
|
||||
|
||||
def __init__(self, src, parser='html5lib'):
|
||||
# WARNING! It seems like it would be ideal to use the 'lxml' parser
|
||||
# for speed, but that doesn't work in our web application. On
|
||||
# Debian stretch, at least, using lxml causes the web server WSGI
|
||||
# application to go into an infinite loop.
|
||||
super(BeautifulSoup, self).__init__(src, parser)
|
||||
|
||||
def _body_text(self, root):
|
||||
# "Body text" is all the strings under the root element, in order,
|
||||
# except:
|
||||
# * strings inside NON_BODY_TEXT_TAGS
|
||||
# * strings inside containers of NON_BODY_TEXT_TAGS. A container is
|
||||
# an element that has a NON_BODY_TEXT_TAGS element as its first child.
|
||||
# For example, in <div> <video …> … </div>, none of the div's strings
|
||||
# are included in the body text, because it's considered to be a
|
||||
# <video> container, and any strings are probably a caption, fallback
|
||||
# text, or other non-body text.
|
||||
started = False
|
||||
for child in root.children:
|
||||
child_type = type(child)
|
||||
if issubclass(child_type, bs4.element.Tag):
|
||||
if child.name in self.NON_BODY_TEXT_TAGS:
|
||||
if not started:
|
||||
break
|
||||
else:
|
||||
for s in self._body_text(child):
|
||||
yield s
|
||||
# It's not worth it to use issubclass here, because elements that
|
||||
# don't have body text like Comments and CDATA are subclasses of
|
||||
# NavigableString.
|
||||
elif child_type is bs4.element.NavigableString:
|
||||
if started:
|
||||
yield child
|
||||
elif child.isspace():
|
||||
pass
|
||||
else:
|
||||
yield child
|
||||
started = True
|
||||
|
||||
def body_text(self):
|
||||
"""Return an iterator of strings comprising this document's body text."""
|
||||
return self._body_text(self)
|
||||
|
||||
def some_body_text(self, char_target=300):
|
||||
"""Return an iterator of strings with some of this document's body text.
|
||||
|
||||
This is the same as body_text, except after it yields a string that
|
||||
looks like the end of a sentence, it checks whether it has yielded
|
||||
at least `char_target` characters. If so, the iterator stops.
|
||||
"""
|
||||
# This implementation is likely to overshoot `char_target` a lot,
|
||||
# because it doesn't look inside the strings it yields, just at the
|
||||
# end of them. We can implement something smarter later if needed.
|
||||
char_count = 0
|
||||
for s in self.body_text():
|
||||
yield s
|
||||
char_count += len(s)
|
||||
if (char_count > char_target) and self.SENTENCE_END.search(s):
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
def is_video_source(elem):
|
||||
try:
|
||||
return elem.name == 'source' and elem.parent.name == 'video'
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
def iter_attr(self, tag, attr_name, **kwargs):
|
||||
kwargs[attr_name] = True
|
||||
for elem in self.find_all(tag, **kwargs):
|
||||
yield elem[attr_name]
|
||||
|
||||
def iter_image_urls(self):
|
||||
"""Return an iterator of source URL strings of all images in this document.
|
||||
|
||||
Images include <img> tags and <video> poster attributes.
|
||||
"""
|
||||
for elem in self.find_all(list(self.IMAGE_ATTRS.keys())):
|
||||
try:
|
||||
yield elem[self.IMAGE_ATTRS[elem.name]]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def iter_video_urls(self):
|
||||
"""Return an iterator of source URL strings of all videos in this document."""
|
||||
return self.iter_attr(self.is_video_source, 'src')
|
||||
|
||||
|
||||
class SoupModelMixin:
|
||||
"""Mixin for models to parse HTML with BeautifulSoup.
|
||||
|
||||
Classes that use this mixin must define `SOUP_ATTRS`, a list of strings
|
||||
that name attributes with HTML in them. After that, all the public methods
|
||||
are usable.
|
||||
"""
|
||||
|
||||
SOUP_ATTRS = []
|
||||
|
||||
def _get_soup(self):
|
||||
try:
|
||||
return self._soup
|
||||
except AttributeError:
|
||||
html = io.StringIO()
|
||||
for attr_name in self.SOUP_ATTRS:
|
||||
html.write(getattr(self, attr_name))
|
||||
html.seek(0)
|
||||
self._soup = BeautifulSoup(html)
|
||||
return self._soup
|
||||
|
||||
def get_description(self):
|
||||
"""Return a string with a brief excerpt of body text from the HTML."""
|
||||
return u''.join(self._get_soup().some_body_text())
|
||||
|
||||
def get_image_urls(self):
|
||||
"""Return an iterator of source URL strings of all images in the HTML.
|
||||
|
||||
Images include <img> tags and <video> poster attributes.
|
||||
"""
|
||||
return self._get_soup().iter_image_urls()
|
||||
|
||||
def get_video_urls(self):
|
||||
"""Return an iterator of source URL strings of all videos in the HTML."""
|
||||
return self._get_soup().iter_video_urls()
|
|
@ -1,5 +1,11 @@
|
|||
{% extends "base_blog.html" %}
|
||||
|
||||
{% block head %}
|
||||
{% include "opengraph_partial.html" with url=object.get_absolute_url title=object.headline description=object.get_description %}
|
||||
{% include "opengraph_urllist_partial.html" with property='image' urls=object.get_image_urls fallback='/img/conservancy-logo.png' %}
|
||||
{% include "opengraph_urllist_partial.html" with property='video' urls=object.get_video_urls %}
|
||||
{% endblock %}
|
||||
|
||||
{% block subtitle %}{{ object.headline|striptags|safe }} - Conservancy Blog - {% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
{% extends "base_news.html" %}
|
||||
|
||||
{% block head %}
|
||||
{% include "opengraph_partial.html" with url=object.get_absolute_url title=object.headline description=object.get_description %}
|
||||
{% include "opengraph_urllist_partial.html" with property='image' urls=object.get_image_urls fallback='/img/conservancy-logo.png' %}
|
||||
{% include "opengraph_urllist_partial.html" with property='video' urls=object.get_video_urls %}
|
||||
{% endblock %}
|
||||
|
||||
{% block subtitle %}{{ object.headline|striptags|safe }} - {% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
|
38
www/conservancy/templates/opengraph_partial.html
Normal file
38
www/conservancy/templates/opengraph_partial.html
Normal file
|
@ -0,0 +1,38 @@
|
|||
{% comment %}
|
||||
|
||||
Include this partial in a head section to include basic Open Graph metadata.
|
||||
Pass a variable `NAME` to give a value for the `og:NAME` property.
|
||||
|
||||
These properties are only listed if you give a value for them:
|
||||
|
||||
* url: A URL string that includes at least an absolute path. This partial
|
||||
will fill in a default scheme and host if needed.
|
||||
* title: A string. Tags are stripped, then the rest is assumed HTML-safe.
|
||||
* description: A string. Tags are stripped, then the rest is assumed
|
||||
HTML-safe.
|
||||
|
||||
These properties are always included. You can override them but you
|
||||
normally shouldn't need to:
|
||||
|
||||
* type: Default "website".
|
||||
* locale: Default "en_US".
|
||||
* site_name: Default "Software Freedom Conservancy"
|
||||
|
||||
{% endcomment %}
|
||||
|
||||
<meta property="og:type" content="{{ type|default:"website" }}">
|
||||
<meta property="og:locale" content="{{ locale|default:"en_US" }}">
|
||||
<meta property="og:site_name" content="{{ site_name|default:"Software Freedom Conservancy" }}">
|
||||
|
||||
{% if url %}
|
||||
{% load fill_url %}
|
||||
<meta property="og:url" content="{{ url|fill_url:host_url }}">
|
||||
{% endif %}
|
||||
|
||||
{% if title %}
|
||||
<meta property="og:title" content="{{ title|striptags|safe }}">
|
||||
{% endif %}
|
||||
|
||||
{% if description %}
|
||||
<meta property="og:description" content="{{ description|striptags|safe }}">
|
||||
{% endif %}
|
26
www/conservancy/templates/opengraph_urllist_partial.html
Normal file
26
www/conservancy/templates/opengraph_urllist_partial.html
Normal file
|
@ -0,0 +1,26 @@
|
|||
{% comment %}
|
||||
|
||||
Include this partial in a head section to include a series of URLs for a
|
||||
given property, like og:image or og:video.
|
||||
|
||||
You must pass the following variables:
|
||||
|
||||
* property: A string with the name of the property, like 'image' or 'video'.
|
||||
* urls: A sequence of URL strings. Each should include at least an absolute
|
||||
path. This partial will fill in a scheme and host if needed.
|
||||
|
||||
You may also pass:
|
||||
|
||||
* fallback: A URL string, following the same rules as in `urls`. This URL
|
||||
will be used if `urls` is empty.
|
||||
|
||||
{% endcomment %}
|
||||
|
||||
{% load fill_url %}
|
||||
{% for url in urls %}
|
||||
<meta property="og:{{ property }}" content="{{ url|fill_url:host_url }}">
|
||||
{% empty %}
|
||||
{% if fallback %}
|
||||
<meta property="og:{{ property }}" content="{{ fallback|fill_url:host_url }}">
|
||||
{% endif %}
|
||||
{% endfor %}
|
Loading…
Reference in a new issue