Sanitizes html input

2020-02-14 21:45:13 -08:00 · 2020-02-14 21:45:13 -08:00 · 6df9700604
commit 6df9700604
parent f414884e00
4 changed files with 59 additions and 1 deletions
--- a/fedireads/incoming.py
+++ b/fedireads/incoming.py
@ -13,6 +13,7 @@ from uuid import uuid4
 from fedireads import models
 from fedireads.remote_user import get_or_create_remote_user
 from fedireads.openlibrary import get_or_create_book
 from fedireads.sanitize_html import InputHtmlParser
 from fedireads.settings import DOMAIN
@ -321,6 +322,9 @@ def create_review(user, activity):
        return HttpResponseNotFound('Book \'%s\' not found' % possible_book)
    content = activity['object'].get('content')
    parser = InputHtmlParser()
    parser.feed(content)
    content = parser.get_output()
    review_title = activity['object'].get('name', 'Untitled')
    rating = activity['object'].get('rating', 0)
--- a/fedireads/sanitize_html.py
+++ b/fedireads/sanitize_html.py
@ -0,0 +1,51 @@
 ''' we're solving fizzbuzz with a little help from the web '''
 from html.parser import HTMLParser
 class InputHtmlParser(HTMLParser):
    ''' Removes any html that isn't whitelisted from a block '''
    def __init__(self):
        HTMLParser.__init__(self)
        self.whitelist = ['p', 'b', 'i', 'pre']
        self.tag_stack = []
        self.output = []
        # if the html appears invalid, we just won't allow any at all
        self.allow_html = True
    def handle_starttag(self, tag, attrs):
        ''' check if the tag is valid '''
        if self.allow_html and tag in self.whitelist:
            self.output.append(('tag', '<%s>' % tag))
            self.tag_stack.append(tag)
        else:
            self.output.append(('data', ' '))
    def handle_endtag(self, tag):
        ''' keep the close tag '''
        if not self.allow_html or tag not in self.whitelist:
            self.output.append(('data', ' '))
            return
        if not self.tag_stack or self.tag_stack[-1] != tag:
            # the end tag doesn't match the most recent start tag
            self.allow_html = False
            self.output.append(('data', ' '))
            return
        self.tag_stack = self.tag_stack[:-1]
        self.output.append(('tag', '</%s>' % tag))
    def handle_data(self, data):
        ''' extract the answer, if we're in an answer tag '''
        self.output.append(('data', data))
    def get_output(self):
        ''' convert the output from a list of tuples to a string '''
        if not self.allow_html:
            return ''.join(v for (k, v) in self.output if k == 'data')
        return ''.join(v for (k, v) in self.output)
--- a/fedireads/static/format.css
+++ b/fedireads/static/format.css
@ -4,6 +4,9 @@
    padding: 0;
    line-height: 1.3em;
    font-family: sans-serif;
 }
 h1, h2, h3, h4 {
    font-weight: normal;
 }
--- a/fedireads/templates/feed.html
+++ b/fedireads/templates/feed.html
@ -80,7 +80,7 @@
                <h3>{{ activity.name }}</h3>
                <p>{{ activity.rating | stars }}</p>
-                <p>{{ activity.review_content }}</p>
+                <p>{{ activity.review_content | safe }}</p>
            </div>
            <div class="interaction"><button>⭐️ Like</button></div>
        {% elif activity.activity_type == 'Follow' %}