Merge pull request #2778 from ranok/upstream_pr

Move the search request logic into the AbstractConnector
This commit is contained in:
Mouse Reeve 2023-04-25 16:20:24 -07:00 committed by GitHub
commit cbb027c56c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 38 deletions

View file

@ -4,13 +4,16 @@ from urllib.parse import quote_plus
import imghdr
import logging
import re
import asyncio
import requests
from requests.exceptions import RequestException
import aiohttp
from django.core.files.base import ContentFile
from django.db import transaction
import requests
from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings
from bookwyrm.settings import USER_AGENT
from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings
@ -57,6 +60,39 @@ class AbstractMinimalConnector(ABC):
return list(self.parse_isbn_search_data(data))[:10]
return list(self.parse_search_data(data, min_confidence))[:10]
async def get_results(self, session, url, min_confidence, query):
"""try this specific connector"""
# pylint: disable=line-too-long
headers = {
"Accept": (
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
),
"User-Agent": USER_AGENT,
}
params = {"min_confidence": min_confidence}
try:
async with session.get(url, headers=headers, params=params) as response:
if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason)
return
try:
raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err)
return
return {
"connector": self,
"results": self.process_search_response(
query, raw_data, min_confidence
),
}
except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err:
logger.info(err)
@abstractmethod
def get_or_create_book(self, remote_id):
"""pull up a book record by whatever means possible"""

View file

@ -12,7 +12,7 @@ from django.db.models import signals
from requests import HTTPError
from bookwyrm import book_search, models
from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
from bookwyrm.settings import SEARCH_TIMEOUT
from bookwyrm.tasks import app, LOW
logger = logging.getLogger(__name__)
@ -22,40 +22,6 @@ class ConnectorException(HTTPError):
"""when the connector can't do what was asked"""
async def get_results(session, url, min_confidence, query, connector):
"""try this specific connector"""
# pylint: disable=line-too-long
headers = {
"Accept": (
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
),
"User-Agent": USER_AGENT,
}
params = {"min_confidence": min_confidence}
try:
async with session.get(url, headers=headers, params=params) as response:
if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason)
return
try:
raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err)
return
return {
"connector": connector,
"results": connector.process_search_response(
query, raw_data, min_confidence
),
}
except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err:
logger.info(err)
async def async_connector_search(query, items, min_confidence):
"""Try a number of requests simultaneously"""
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
@ -64,7 +30,7 @@ async def async_connector_search(query, items, min_confidence):
for url, connector in items:
tasks.append(
asyncio.ensure_future(
get_results(session, url, min_confidence, query, connector)
connector.get_results(session, url, min_confidence, query)
)
)