forked from mirrors/bookwyrm
Adds some simple url validation
This commit is contained in:
parent
ebc3f14f22
commit
3e635f497e
2 changed files with 29 additions and 2 deletions
|
@ -1,7 +1,9 @@
|
||||||
""" functionality outline for a book data connector """
|
""" functionality outline for a book data connector """
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
import imghdr
|
import imghdr
|
||||||
|
import ipaddress
|
||||||
import logging
|
import logging
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from django.core.files.base import ContentFile
|
from django.core.files.base import ContentFile
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
@ -250,6 +252,8 @@ def dict_from_mappings(data, mappings):
|
||||||
def get_data(url, params=None, timeout=10):
|
def get_data(url, params=None, timeout=10):
|
||||||
"""wrapper for request.get"""
|
"""wrapper for request.get"""
|
||||||
# check if the url is blocked
|
# check if the url is blocked
|
||||||
|
raise_not_valid_url(url)
|
||||||
|
|
||||||
if models.FederatedServer.is_blocked(url):
|
if models.FederatedServer.is_blocked(url):
|
||||||
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
|
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
|
||||||
|
|
||||||
|
@ -282,6 +286,7 @@ def get_data(url, params=None, timeout=10):
|
||||||
|
|
||||||
def get_image(url, timeout=10):
|
def get_image(url, timeout=10):
|
||||||
"""wrapper for requesting an image"""
|
"""wrapper for requesting an image"""
|
||||||
|
raise_not_valid_url(url)
|
||||||
try:
|
try:
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
url,
|
url,
|
||||||
|
@ -306,6 +311,20 @@ def get_image(url, timeout=10):
|
||||||
return image_content, extension
|
return image_content, extension
|
||||||
|
|
||||||
|
|
||||||
|
def raise_not_valid_url(url):
|
||||||
|
"""do some basic reality checks on the url"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if not parsed.scheme in ["http", "https"]:
|
||||||
|
raise ConnectorException("Invalid scheme: ", url)
|
||||||
|
|
||||||
|
try:
|
||||||
|
ipaddress.ip_address(parsed.netloc)
|
||||||
|
raise ConnectorException("Provided url is an IP address: ", url)
|
||||||
|
except ValueError:
|
||||||
|
# it's not an IP address, which is good
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Mapping:
|
class Mapping:
|
||||||
"""associate a local database field with a field in an external dataset"""
|
"""associate a local database field with a field in an external dataset"""
|
||||||
|
|
||||||
|
|
|
@ -4,8 +4,8 @@ from django.test import TestCase
|
||||||
import responses
|
import responses
|
||||||
|
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
from bookwyrm.connectors import abstract_connector
|
from bookwyrm.connectors import abstract_connector, ConnectorException
|
||||||
from bookwyrm.connectors.abstract_connector import Mapping
|
from bookwyrm.connectors.abstract_connector import Mapping, get_data
|
||||||
from bookwyrm.settings import DOMAIN
|
from bookwyrm.settings import DOMAIN
|
||||||
|
|
||||||
|
|
||||||
|
@ -163,3 +163,11 @@ class AbstractConnector(TestCase):
|
||||||
author.refresh_from_db()
|
author.refresh_from_db()
|
||||||
self.assertEqual(author.name, "Test")
|
self.assertEqual(author.name, "Test")
|
||||||
self.assertEqual(author.isni, "hi")
|
self.assertEqual(author.isni, "hi")
|
||||||
|
|
||||||
|
def test_get_data_invalid_url(self):
|
||||||
|
"""load json data from an arbitrary url"""
|
||||||
|
with self.assertRaises(ConnectorException):
|
||||||
|
get_data("file://hello.com/image/jpg")
|
||||||
|
|
||||||
|
with self.assertRaises(ConnectorException):
|
||||||
|
get_data("http://127.0.0.1/image/jpg")
|
||||||
|
|
Loading…
Reference in a new issue