mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-22 17:41:08 +00:00
Merge pull request #1915 from bookwyrm-social/url-validation
Adds some simple url validation
This commit is contained in:
commit
e80a4c16f0
2 changed files with 29 additions and 2 deletions
|
@ -1,7 +1,9 @@
|
|||
""" functionality outline for a book data connector """
|
||||
from abc import ABC, abstractmethod
|
||||
import imghdr
|
||||
import ipaddress
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.core.files.base import ContentFile
|
||||
from django.db import transaction
|
||||
|
@ -250,6 +252,8 @@ def dict_from_mappings(data, mappings):
|
|||
def get_data(url, params=None, timeout=10):
|
||||
"""wrapper for request.get"""
|
||||
# check if the url is blocked
|
||||
raise_not_valid_url(url)
|
||||
|
||||
if models.FederatedServer.is_blocked(url):
|
||||
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
|
||||
|
||||
|
@ -282,6 +286,7 @@ def get_data(url, params=None, timeout=10):
|
|||
|
||||
def get_image(url, timeout=10):
|
||||
"""wrapper for requesting an image"""
|
||||
raise_not_valid_url(url)
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
|
@ -306,6 +311,20 @@ def get_image(url, timeout=10):
|
|||
return image_content, extension
|
||||
|
||||
|
||||
def raise_not_valid_url(url):
|
||||
"""do some basic reality checks on the url"""
|
||||
parsed = urlparse(url)
|
||||
if not parsed.scheme in ["http", "https"]:
|
||||
raise ConnectorException("Invalid scheme: ", url)
|
||||
|
||||
try:
|
||||
ipaddress.ip_address(parsed.netloc)
|
||||
raise ConnectorException("Provided url is an IP address: ", url)
|
||||
except ValueError:
|
||||
# it's not an IP address, which is good
|
||||
pass
|
||||
|
||||
|
||||
class Mapping:
|
||||
"""associate a local database field with a field in an external dataset"""
|
||||
|
||||
|
|
|
@ -4,8 +4,8 @@ from django.test import TestCase
|
|||
import responses
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.connectors import abstract_connector
|
||||
from bookwyrm.connectors.abstract_connector import Mapping
|
||||
from bookwyrm.connectors import abstract_connector, ConnectorException
|
||||
from bookwyrm.connectors.abstract_connector import Mapping, get_data
|
||||
from bookwyrm.settings import DOMAIN
|
||||
|
||||
|
||||
|
@ -163,3 +163,11 @@ class AbstractConnector(TestCase):
|
|||
author.refresh_from_db()
|
||||
self.assertEqual(author.name, "Test")
|
||||
self.assertEqual(author.isni, "hi")
|
||||
|
||||
def test_get_data_invalid_url(self):
|
||||
"""load json data from an arbitrary url"""
|
||||
with self.assertRaises(ConnectorException):
|
||||
get_data("file://hello.com/image/jpg")
|
||||
|
||||
with self.assertRaises(ConnectorException):
|
||||
get_data("http://127.0.0.1/image/jpg")
|
||||
|
|
Loading…
Reference in a new issue