Blogs/news only include a single OG image/video.
For now, this gives us more assurance that other sites will choose the preview we want. You can control the selection by adding data-ogpreview to image, video, and source elements. data-ogpreview=0 excludes the element from being included in the preview. Positive numbers set the preview priority. The lowest value found is chosen first.
This commit is contained in:
		
							parent
							
								
									5c75decd30
								
							
						
					
					
						commit
						703df9c8e9
					
				
					 3 changed files with 60 additions and 21 deletions
				
			
		|  | @ -1,6 +1,7 @@ | |||
| # -*- encoding: utf-8 -*- | ||||
| 
 | ||||
| import io | ||||
| import itertools | ||||
| import re | ||||
| 
 | ||||
| import bs4 | ||||
|  | @ -86,25 +87,22 @@ class BeautifulSoup(bs4.BeautifulSoup): | |||
|         except AttributeError: | ||||
|             return False | ||||
| 
 | ||||
|     def iter_attr(self, tag, attr_name, **kwargs): | ||||
|         kwargs[attr_name] = True | ||||
|         for elem in self.find_all(tag, **kwargs): | ||||
|             yield elem[attr_name] | ||||
|     def iter_images(self): | ||||
|         """Return an iterator of all image elements in this document. | ||||
| 
 | ||||
|     def iter_image_urls(self): | ||||
|         """Return an iterator of source URL strings of all images in this document. | ||||
| 
 | ||||
|         Images include <img> tags and <video> poster attributes. | ||||
|         Images include <img> and <video> with a poster attribute. | ||||
|         """ | ||||
|         for elem in self.find_all(list(self.IMAGE_ATTRS.keys())): | ||||
|             try: | ||||
|                 yield elem[self.IMAGE_ATTRS[elem.name]] | ||||
|                 elem[self.IMAGE_ATTRS[elem.name]] | ||||
|             except KeyError: | ||||
|                 pass | ||||
|             else: | ||||
|                 yield elem | ||||
| 
 | ||||
|     def iter_video_urls(self): | ||||
|         """Return an iterator of source URL strings of all videos in this document.""" | ||||
|         return self.iter_attr(self.is_video_source, 'src') | ||||
|     def iter_videos(self): | ||||
|         """Return an iterator of all video source elements in this document.""" | ||||
|         return self.find_all(self.is_video_source, src=True) | ||||
| 
 | ||||
| 
 | ||||
| class SoupModelMixin: | ||||
|  | @ -115,6 +113,7 @@ class SoupModelMixin: | |||
|     are usable. | ||||
|     """ | ||||
| 
 | ||||
|     OG_PREVIEW_ATTR = 'data-ogpreview' | ||||
|     SOUP_ATTRS = [] | ||||
| 
 | ||||
|     def _get_soup(self): | ||||
|  | @ -128,17 +127,57 @@ class SoupModelMixin: | |||
|             self._soup = BeautifulSoup(html) | ||||
|             return self._soup | ||||
| 
 | ||||
|     def _elem_key(self, attr_name=OG_PREVIEW_ATTR, getvalue=int, fallback=999999): | ||||
|         def elem_sort_key(elem): | ||||
|             try: | ||||
|                 sort_key = getvalue(elem[attr_name]) | ||||
|             except (KeyError, ValueError): | ||||
|                 sort_key = fallback | ||||
|             elem[attr_name] = sort_key | ||||
|             return sort_key | ||||
|         return elem_sort_key | ||||
| 
 | ||||
|     def _elem_pred(self, attr_name=OG_PREVIEW_ATTR, test=lambda n: n > 0): | ||||
|         def elem_pred(elem): | ||||
|             return test(elem[attr_name]) | ||||
|         return elem_pred | ||||
| 
 | ||||
|     def _sort_and_slice_elems(self, elem_seq, elem_key, pred, *slice_args): | ||||
|         seq = itertools.ifilter(pred, sorted(elem_seq, key=elem_key)) | ||||
|         if slice_args: | ||||
|             return itertools.islice(seq, *slice_args) | ||||
|         else: | ||||
|             return seq | ||||
| 
 | ||||
|     def get_description(self): | ||||
|         """Return a string with a brief excerpt of body text from the HTML.""" | ||||
|         return u''.join(self._get_soup().some_body_text()) | ||||
| 
 | ||||
|     def get_image_urls(self): | ||||
|     def get_image_urls(self, *slice_args): | ||||
|         """Return an iterator of source URL strings of all images in the HTML. | ||||
| 
 | ||||
|         Images include <img> tags and <video> poster attributes. | ||||
|         Images include <img> sources and <video> poster attributes. | ||||
|         """ | ||||
|         return self._get_soup().iter_image_urls() | ||||
|         for elem in self._sort_and_slice_elems( | ||||
|                 self._get_soup().iter_images(), | ||||
|                 self._elem_key(), | ||||
|                 self._elem_pred(), | ||||
|                 *slice_args | ||||
|         ): | ||||
|             yield elem[BeautifulSoup.IMAGE_ATTRS[elem.name]] | ||||
| 
 | ||||
|     def get_video_urls(self): | ||||
|     def get_one_image_url(self): | ||||
|         return self.get_image_urls(1) | ||||
| 
 | ||||
|     def get_video_urls(self, *slice_args): | ||||
|         """Return an iterator of source URL strings of all videos in the HTML.""" | ||||
|         return self._get_soup().iter_video_urls() | ||||
|         for elem in self._sort_and_slice_elems( | ||||
|                 self._get_soup().iter_videos(), | ||||
|                 self._elem_key(), | ||||
|                 self._elem_pred(), | ||||
|                 *slice_args | ||||
|         ): | ||||
|             yield elem['src'] | ||||
| 
 | ||||
|     def get_one_video_url(self): | ||||
|         return self.get_video_urls(1) | ||||
|  |  | |||
|  | @ -2,8 +2,8 @@ | |||
| 
 | ||||
| {% block head %} | ||||
| {% include "opengraph_partial.html" with url=object.get_absolute_url title=object.headline description=object.get_description %} | ||||
| {% include "opengraph_urllist_partial.html" with property='image' urls=object.get_image_urls fallback='/img/conservancy-logo.png' %} | ||||
| {% include "opengraph_urllist_partial.html" with property='video' urls=object.get_video_urls %} | ||||
| {% include "opengraph_urllist_partial.html" with property='image' urls=object.get_one_image_url fallback='/img/conservancy-logo.png' %} | ||||
| {% include "opengraph_urllist_partial.html" with property='video' urls=object.get_one_video_url %} | ||||
| {% endblock %} | ||||
| 
 | ||||
| {% block subtitle %}{{ object.headline|striptags|safe }} - Conservancy Blog - {% endblock %} | ||||
|  |  | |||
|  | @ -2,8 +2,8 @@ | |||
| 
 | ||||
| {% block head %} | ||||
| {% include "opengraph_partial.html" with url=object.get_absolute_url title=object.headline description=object.get_description %} | ||||
| {% include "opengraph_urllist_partial.html" with property='image' urls=object.get_image_urls fallback='/img/conservancy-logo.png' %} | ||||
| {% include "opengraph_urllist_partial.html" with property='video' urls=object.get_video_urls %} | ||||
| {% include "opengraph_urllist_partial.html" with property='image' urls=object.get_one_image_url fallback='/img/conservancy-logo.png' %} | ||||
| {% include "opengraph_urllist_partial.html" with property='video' urls=object.get_one_video_url %} | ||||
| {% endblock %} | ||||
| 
 | ||||
| {% block subtitle %}{{ object.headline|striptags|safe }} - {% endblock %} | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Brett Smith
						Brett Smith