Merge pull request #1915 from bookwyrm-social/url-validation

Adds some simple url validation
This commit is contained in:
Mouse Reeve 2022-02-04 08:45:43 -08:00 committed by GitHub
commit e80a4c16f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 2 deletions

View file

@ -1,7 +1,9 @@
""" functionality outline for a book data connector """
from abc import ABC, abstractmethod
import imghdr
import ipaddress
import logging
from urllib.parse import urlparse
from django.core.files.base import ContentFile
from django.db import transaction
@ -250,6 +252,8 @@ def dict_from_mappings(data, mappings):
def get_data(url, params=None, timeout=10):
"""wrapper for request.get"""
# check if the url is blocked
raise_not_valid_url(url)
if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
@ -282,6 +286,7 @@ def get_data(url, params=None, timeout=10):
def get_image(url, timeout=10):
"""wrapper for requesting an image"""
raise_not_valid_url(url)
try:
resp = requests.get(
url,
@ -306,6 +311,20 @@ def get_image(url, timeout=10):
return image_content, extension
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
class Mapping:
"""associate a local database field with a field in an external dataset"""

View file

@ -4,8 +4,8 @@ from django.test import TestCase
import responses
from bookwyrm import models
from bookwyrm.connectors import abstract_connector
from bookwyrm.connectors.abstract_connector import Mapping
from bookwyrm.connectors import abstract_connector, ConnectorException
from bookwyrm.connectors.abstract_connector import Mapping, get_data
from bookwyrm.settings import DOMAIN
@ -163,3 +163,11 @@ class AbstractConnector(TestCase):
author.refresh_from_db()
self.assertEqual(author.name, "Test")
self.assertEqual(author.isni, "hi")
def test_get_data_invalid_url(self):
"""load json data from an arbitrary url"""
with self.assertRaises(ConnectorException):
get_data("file://hello.com/image/jpg")
with self.assertRaises(ConnectorException):
get_data("http://127.0.0.1/image/jpg")