Adds some simple url validation

This commit is contained in:
Mouse Reeve 2022-02-03 15:11:01 -08:00
parent ebc3f14f22
commit 3e635f497e
2 changed files with 29 additions and 2 deletions

View file

@ -1,7 +1,9 @@
""" functionality outline for a book data connector """ """ functionality outline for a book data connector """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import imghdr import imghdr
import ipaddress
import logging import logging
from urllib.parse import urlparse
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.db import transaction from django.db import transaction
@ -250,6 +252,8 @@ def dict_from_mappings(data, mappings):
def get_data(url, params=None, timeout=10): def get_data(url, params=None, timeout=10):
"""wrapper for request.get""" """wrapper for request.get"""
# check if the url is blocked # check if the url is blocked
raise_not_valid_url(url)
if models.FederatedServer.is_blocked(url): if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}") raise ConnectorException(f"Attempting to load data from blocked url: {url}")
@ -282,6 +286,7 @@ def get_data(url, params=None, timeout=10):
def get_image(url, timeout=10): def get_image(url, timeout=10):
"""wrapper for requesting an image""" """wrapper for requesting an image"""
raise_not_valid_url(url)
try: try:
resp = requests.get( resp = requests.get(
url, url,
@ -306,6 +311,20 @@ def get_image(url, timeout=10):
return image_content, extension return image_content, extension
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
class Mapping: class Mapping:
"""associate a local database field with a field in an external dataset""" """associate a local database field with a field in an external dataset"""

View file

@ -4,8 +4,8 @@ from django.test import TestCase
import responses import responses
from bookwyrm import models from bookwyrm import models
from bookwyrm.connectors import abstract_connector from bookwyrm.connectors import abstract_connector, ConnectorException
from bookwyrm.connectors.abstract_connector import Mapping from bookwyrm.connectors.abstract_connector import Mapping, get_data
from bookwyrm.settings import DOMAIN from bookwyrm.settings import DOMAIN
@ -163,3 +163,11 @@ class AbstractConnector(TestCase):
author.refresh_from_db() author.refresh_from_db()
self.assertEqual(author.name, "Test") self.assertEqual(author.name, "Test")
self.assertEqual(author.isni, "hi") self.assertEqual(author.isni, "hi")
def test_get_data_invalid_url(self):
"""load json data from an arbitrary url"""
with self.assertRaises(ConnectorException):
get_data("file://hello.com/image/jpg")
with self.assertRaises(ConnectorException):
get_data("http://127.0.0.1/image/jpg")