moviewyrm/bookwyrm/connectors/connector_manager.py

142 lines
4.6 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" interface with whatever connectors the app has """
2021-01-02 16:14:28 +00:00
import importlib
2021-04-07 15:09:47 +00:00
import logging
2021-03-01 20:09:21 +00:00
import re
from urllib.parse import urlparse
from django.dispatch import receiver
from django.db.models import signals
from requests import HTTPError
from bookwyrm import models
2021-01-02 16:14:28 +00:00
from bookwyrm.tasks import app
2021-04-07 15:09:47 +00:00
logger = logging.getLogger(__name__)
2020-03-07 20:22:28 +00:00
2021-01-02 16:14:28 +00:00
class ConnectorException(HTTPError):
2021-03-08 16:49:10 +00:00
""" when the connector can't do what was asked """
2020-10-29 22:29:23 +00:00
def search(query, min_confidence=0.1):
2021-03-08 16:49:10 +00:00
""" find books based on arbitary keywords """
2021-03-31 19:03:58 +00:00
if not query:
return []
results = []
2021-03-01 20:09:21 +00:00
# Have we got a ISBN ?
isbn = re.sub(r"[\W_]", "", query)
2021-03-08 16:49:10 +00:00
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
2021-03-01 20:09:21 +00:00
2021-03-08 16:49:10 +00:00
dedup_slug = lambda r: "%s/%s/%s" % (r.title, r.author, r.year)
2020-05-03 22:26:47 +00:00
result_index = set()
for connector in get_connectors():
2021-03-01 20:09:21 +00:00
result_set = None
if maybe_isbn:
# Search on ISBN
2021-03-08 16:49:10 +00:00
if not connector.isbn_search_url or connector.isbn_search_url == "":
2021-03-01 20:09:21 +00:00
result_set = []
else:
try:
result_set = connector.isbn_search(isbn)
except (HTTPError, ConnectorException):
pass
# if no isbn search or results, we fallback to generic search
if result_set in (None, []):
2021-03-01 20:09:21 +00:00
try:
result_set = connector.search(query, min_confidence=min_confidence)
2021-04-07 15:09:47 +00:00
except Exception as e: # pylint: disable=broad-except
# we don't want *any* error to crash the whole search page
logger.exception(e)
2021-03-01 20:09:21 +00:00
continue
2020-05-03 20:32:23 +00:00
# if the search results look the same, ignore them
2021-03-08 16:49:10 +00:00
result_set = [r for r in result_set if dedup_slug(r) not in result_index]
2020-05-03 22:26:47 +00:00
# `|=` concats two sets. WE ARE GETTING FANCY HERE
result_index |= set(dedup_slug(r) for r in result_set)
2021-03-08 16:49:10 +00:00
results.append(
{
"connector": connector,
"results": result_set,
}
)
2020-04-29 17:57:20 +00:00
return results
2021-01-31 19:11:26 +00:00
def local_search(query, min_confidence=0.1, raw=False):
2021-03-08 16:49:10 +00:00
""" only look at local search results """
2020-05-04 17:15:41 +00:00
connector = load_connector(models.Connector.objects.get(local=True))
2021-01-31 19:11:26 +00:00
return connector.search(query, min_confidence=min_confidence, raw=raw)
2020-05-04 17:15:41 +00:00
2021-03-01 20:09:21 +00:00
def isbn_local_search(query, raw=False):
2021-03-08 16:49:10 +00:00
""" only look at local search results """
2021-03-01 20:09:21 +00:00
connector = load_connector(models.Connector.objects.get(local=True))
return connector.isbn_search(query, raw=raw)
2020-10-29 22:29:23 +00:00
def first_search_result(query, min_confidence=0.1):
2021-03-08 16:49:10 +00:00
""" search until you find a result that fits """
2020-05-03 22:26:47 +00:00
for connector in get_connectors():
2020-10-29 22:29:23 +00:00
result = connector.search(query, min_confidence=min_confidence)
2020-05-03 22:26:47 +00:00
if result:
return result[0]
return None
def get_connectors():
2021-03-08 16:49:10 +00:00
""" load all connectors """
for info in models.Connector.objects.order_by("priority").all():
2020-05-12 17:01:36 +00:00
yield load_connector(info)
2021-01-02 16:14:28 +00:00
def get_or_create_connector(remote_id):
""" get the connector related to the object's server """
2021-01-02 16:14:28 +00:00
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
2021-03-08 16:49:10 +00:00
raise ValueError("Invalid remote id")
2021-01-02 16:14:28 +00:00
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
2021-03-08 16:49:10 +00:00
connector_file="bookwyrm_connector",
base_url="https://%s" % identifier,
books_url="https://%s/book" % identifier,
covers_url="https://%s/images/covers" % identifier,
search_url="https://%s/search?q=" % identifier,
priority=2,
2021-01-02 16:14:28 +00:00
)
return load_connector(connector_info)
@app.task
def load_more_data(connector_id, book_id):
2021-03-08 16:49:10 +00:00
""" background the work of getting all 10,000 editions of LoTR """
2021-01-02 16:14:28 +00:00
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
book = models.Book.objects.select_subclasses().get(id=book_id)
connector.expand_book_data(book)
def load_connector(connector_info):
2021-03-08 16:49:10 +00:00
""" instantiate the connector class """
2021-01-02 16:14:28 +00:00
connector = importlib.import_module(
2021-03-08 16:49:10 +00:00
"bookwyrm.connectors.%s" % connector_info.connector_file
2021-01-02 16:14:28 +00:00
)
return connector.Connector(connector_info.identifier)
@receiver(signals.post_save, sender="bookwyrm.FederatedServer")
# pylint: disable=unused-argument
def create_connector(sender, instance, created, *args, **kwargs):
""" create a connector to an external bookwyrm server """
if instance.application_type == "bookwyrm":
get_or_create_connector("https://{:s}".format(instance.server_name))