Merge pull request #2139 from bookwyrm-social/search-refactor

Search refactor
2022-05-31 10:22:17 -07:00 · 2022-05-31 10:22:17 -07:00 · 355e7039f0
commit 355e7039f0
parent 7905be7de2 c3b35760a2
18 changed files with 214 additions and 367 deletions
--- a/bookwyrm/book_search.py
+++ b/bookwyrm/book_search.py
@ -148,8 +148,8 @@ class SearchResult:

    def __repr__(self):
        # pylint: disable=consider-using-f-string
-        return "<SearchResult key={!r} title={!r} author={!r}>".format(
-            self.key, self.title, self.author
+        return "<SearchResult key={!r} title={!r} author={!r} confidence={!r}>".format(
+            self.key, self.title, self.author, self.confidence
        )

    def json(self):
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@ -1,9 +1,8 @@
 """ functionality outline for a book data connector """
 from abc import ABC, abstractmethod
 import imghdr
-import ipaddress
 import logging
-from urllib.parse import urlparse
+import re

 from django.core.files.base import ContentFile
 from django.db import transaction
@ -11,7 +10,7 @@ import requests
 from requests.exceptions import RequestException

 from bookwyrm import activitypub, models, settings
-from .connector_manager import load_more_data, ConnectorException
+from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
 from .format_mappings import format_mappings


@ -39,62 +38,34 @@ class AbstractMinimalConnector(ABC):
        for field in self_fields:
            setattr(self, field, getattr(info, field))

-    def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
-        """free text search"""
-        params = {}
-        if min_confidence:
-            params["min_confidence"] = min_confidence
+    def get_search_url(self, query):
+        """format the query url"""
+        # Check if the query resembles an ISBN
+        if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
+            return f"{self.isbn_search_url}{query}"

-        data = self.get_search_data(
-            f"{self.search_url}{query}",
-            params=params,
-            timeout=timeout,
-        )
-        results = []
+        # NOTE: previously, we tried searching isbn and if that produces no results,
+        # searched as free text. This, instead, only searches isbn if it's isbn-y
+        return f"{self.search_url}{query}"

-        for doc in self.parse_search_data(data)[:10]:
-            results.append(self.format_search_result(doc))
-        return results
-
-    def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
-        """isbn search"""
-        params = {}
-        data = self.get_search_data(
-            f"{self.isbn_search_url}{query}",
-            params=params,
-            timeout=timeout,
-        )
-        results = []
-
-        # this shouldn't be returning mutliple results, but just in case
-        for doc in self.parse_isbn_search_data(data)[:10]:
-            results.append(self.format_isbn_search_result(doc))
-        return results
-
-    def get_search_data(self, remote_id, **kwargs):  # pylint: disable=no-self-use
-        """this allows connectors to override the default behavior"""
-        return get_data(remote_id, **kwargs)
+    def process_search_response(self, query, data, min_confidence):
+        """Format the search results based on the formt of the query"""
+        if maybe_isbn(query):
+            return list(self.parse_isbn_search_data(data))[:10]
+        return list(self.parse_search_data(data, min_confidence))[:10]

    @abstractmethod
    def get_or_create_book(self, remote_id):
        """pull up a book record by whatever means possible"""

    @abstractmethod
-    def parse_search_data(self, data):
+    def parse_search_data(self, data, min_confidence):
        """turn the result json from a search into a list"""

-    @abstractmethod
-    def format_search_result(self, search_result):
-        """create a SearchResult obj from json"""
-
    @abstractmethod
    def parse_isbn_search_data(self, data):
        """turn the result json from a search into a list"""

-    @abstractmethod
-    def format_isbn_search_result(self, search_result):
-        """create a SearchResult obj from json"""
-

 class AbstractConnector(AbstractMinimalConnector):
    """generic book data connector"""
@ -254,9 +225,6 @@ def get_data(url, params=None, timeout=10):
    # check if the url is blocked
    raise_not_valid_url(url)

-    if models.FederatedServer.is_blocked(url):
-        raise ConnectorException(f"Attempting to load data from blocked url: {url}")
-
    try:
        resp = requests.get(
            url,
@ -311,20 +279,6 @@ def get_image(url, timeout=10):
    return image_content, extension


-def raise_not_valid_url(url):
-    """do some basic reality checks on the url"""
-    parsed = urlparse(url)
-    if not parsed.scheme in ["http", "https"]:
-        raise ConnectorException("Invalid scheme: ", url)
-
-    try:
-        ipaddress.ip_address(parsed.netloc)
-        raise ConnectorException("Provided url is an IP address: ", url)
-    except ValueError:
-        # it's not an IP address, which is good
-        pass
-
-
 class Mapping:
    """associate a local database field with a field in an external dataset"""

@ -366,3 +320,9 @@ def unique_physical_format(format_text):
        # try a direct match, so saving this would be redundant
        return None
    return format_text
+
+
+def maybe_isbn(query):
+    """check if a query looks like an isbn"""
+    isbn = re.sub(r"[\W_]", "", query)  # removes filler characters
+    return len(isbn) in [10, 13]  # ISBN10 or ISBN13
--- a/bookwyrm/connectors/bookwyrm_connector.py
+++ b/bookwyrm/connectors/bookwyrm_connector.py
@ -10,15 +10,12 @@ class Connector(AbstractMinimalConnector):
    def get_or_create_book(self, remote_id):
        return activitypub.resolve_remote_id(remote_id, model=models.Edition)

-    def parse_search_data(self, data):
-        return data
-
-    def format_search_result(self, search_result):
-        search_result["connector"] = self
-        return SearchResult(**search_result)
+    def parse_search_data(self, data, min_confidence):
+        for search_result in data:
+            search_result["connector"] = self
+            yield SearchResult(**search_result)

    def parse_isbn_search_data(self, data):
-        return data
-
-    def format_isbn_search_result(self, search_result):
-        return self.format_search_result(search_result)
+        for search_result in data:
+            search_result["connector"] = self
+            yield SearchResult(**search_result)
--- a/bookwyrm/connectors/connector_manager.py
+++ b/bookwyrm/connectors/connector_manager.py
@ -1,17 +1,18 @@
 """ interface with whatever connectors the app has """
-from datetime import datetime
+import asyncio
 import importlib
+import ipaddress
 import logging
-import re
 from urllib.parse import urlparse

+import aiohttp
 from django.dispatch import receiver
 from django.db.models import signals

 from requests import HTTPError

 from bookwyrm import book_search, models
-from bookwyrm.settings import SEARCH_TIMEOUT
+from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
 from bookwyrm.tasks import app

 logger = logging.getLogger(__name__)
@ -21,53 +22,85 @@ class ConnectorException(HTTPError):
    """when the connector can't do what was asked"""


+async def get_results(session, url, min_confidence, query, connector):
+    """try this specific connector"""
+    # pylint: disable=line-too-long
+    headers = {
+        "Accept": (
+            'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
+        ),
+        "User-Agent": USER_AGENT,
+    }
+    params = {"min_confidence": min_confidence}
+    try:
+        async with session.get(url, headers=headers, params=params) as response:
+            if not response.ok:
+                logger.info("Unable to connect to %s: %s", url, response.reason)
+                return
+
+            try:
+                raw_data = await response.json()
+            except aiohttp.client_exceptions.ContentTypeError as err:
+                logger.exception(err)
+                return
+
+            return {
+                "connector": connector,
+                "results": connector.process_search_response(
+                    query, raw_data, min_confidence
+                ),
+            }
+    except asyncio.TimeoutError:
+        logger.info("Connection timed out for url: %s", url)
+    except aiohttp.ClientError as err:
+        logger.exception(err)
+
+
+async def async_connector_search(query, items, min_confidence):
+    """Try a number of requests simultaneously"""
+    timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        tasks = []
+        for url, connector in items:
+            tasks.append(
+                asyncio.ensure_future(
+                    get_results(session, url, min_confidence, query, connector)
+                )
+            )
+
+        results = await asyncio.gather(*tasks)
+        return results
+
+
 def search(query, min_confidence=0.1, return_first=False):
    """find books based on arbitary keywords"""
    if not query:
        return []
    results = []

-    # Have we got a ISBN ?
-    isbn = re.sub(r"[\W_]", "", query)
-    maybe_isbn = len(isbn) in [10, 13]  # ISBN10 or ISBN13
-
-    start_time = datetime.now()
+    items = []
    for connector in get_connectors():
-        result_set = None
-        if maybe_isbn and connector.isbn_search_url and connector.isbn_search_url != "":
-            # Search on ISBN
-            try:
-                result_set = connector.isbn_search(isbn)
-            except Exception as err:  # pylint: disable=broad-except
-                logger.info(err)
-                # if this fails, we can still try regular search
+        # get the search url from the connector before sending
+        url = connector.get_search_url(query)
+        try:
+            raise_not_valid_url(url)
+        except ConnectorException:
+            # if this URL is invalid we should skip it and move on
+            logger.info("Request denied to blocked domain: %s", url)
+            continue
+        items.append((url, connector))

-        # if no isbn search results, we fallback to generic search
-        if not result_set:
-            try:
-                result_set = connector.search(query, min_confidence=min_confidence)
-            except Exception as err:  # pylint: disable=broad-except
-                # we don't want *any* error to crash the whole search page
-                logger.info(err)
-                continue
-
-        if return_first and result_set:
-            # if we found anything, return it
-            return result_set[0]
-
-        if result_set:
-            results.append(
-                {
-                    "connector": connector,
-                    "results": result_set,
-                }
-            )
-        if (datetime.now() - start_time).seconds >= SEARCH_TIMEOUT:
-            break
+    # load as many results as we can
+    results = asyncio.run(async_connector_search(query, items, min_confidence))
+    results = [r for r in results if r]

    if return_first:
-        return None
+        # find the best result from all the responses and return that
+        all_results = [r for con in results for r in con["results"]]
+        all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True)
+        return all_results[0] if all_results else None

+    # failed requests will return None, so filter those out
    return results


@ -133,3 +166,20 @@ def create_connector(sender, instance, created, *args, **kwargs):
    """create a connector to an external bookwyrm server"""
    if instance.application_type == "bookwyrm":
        get_or_create_connector(f"https://{instance.server_name}")
+
+
+def raise_not_valid_url(url):
+    """do some basic reality checks on the url"""
+    parsed = urlparse(url)
+    if not parsed.scheme in ["http", "https"]:
+        raise ConnectorException("Invalid scheme: ", url)
+
+    try:
+        ipaddress.ip_address(parsed.netloc)
+        raise ConnectorException("Provided url is an IP address: ", url)
+    except ValueError:
+        # it's not an IP address, which is good
+        pass
+
+    if models.FederatedServer.is_blocked(url):
+        raise ConnectorException(f"Attempting to load data from blocked url: {url}")
--- a/bookwyrm/connectors/inventaire.py
+++ b/bookwyrm/connectors/inventaire.py
@ -77,53 +77,42 @@ class Connector(AbstractConnector):
            **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
        }

-    def search(self, query, min_confidence=None):  # pylint: disable=arguments-differ
-        """overrides default search function with confidence ranking"""
-        results = super().search(query)
-        if min_confidence:
-            # filter the search results after the fact
-            return [r for r in results if r.confidence >= min_confidence]
-        return results
-
-    def parse_search_data(self, data):
-        return data.get("results")
-
-    def format_search_result(self, search_result):
-        images = search_result.get("image")
-        cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
-        # a deeply messy translation of inventaire's scores
-        confidence = float(search_result.get("_score", 0.1))
-        confidence = 0.1 if confidence < 150 else 0.999
-        return SearchResult(
-            title=search_result.get("label"),
-            key=self.get_remote_id(search_result.get("uri")),
-            author=search_result.get("description"),
-            view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
-            cover=cover,
-            confidence=confidence,
-            connector=self,
-        )
+    def parse_search_data(self, data, min_confidence):
+        for search_result in data.get("results", []):
+            images = search_result.get("image")
+            cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
+            # a deeply messy translation of inventaire's scores
+            confidence = float(search_result.get("_score", 0.1))
+            confidence = 0.1 if confidence < 150 else 0.999
+            if confidence < min_confidence:
+                continue
+            yield SearchResult(
+                title=search_result.get("label"),
+                key=self.get_remote_id(search_result.get("uri")),
+                author=search_result.get("description"),
+                view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
+                cover=cover,
+                confidence=confidence,
+                connector=self,
+            )

    def parse_isbn_search_data(self, data):
        """got some daaaata"""
        results = data.get("entities")
        if not results:
-            return []
-        return list(results.values())
-
-    def format_isbn_search_result(self, search_result):
-        """totally different format than a regular search result"""
-        title = search_result.get("claims", {}).get("wdt:P1476", [])
-        if not title:
-            return None
-        return SearchResult(
-            title=title[0],
-            key=self.get_remote_id(search_result.get("uri")),
-            author=search_result.get("description"),
-            view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
-            cover=self.get_cover_url(search_result.get("image")),
-            connector=self,
-        )
+            return
+        for search_result in list(results.values()):
+            title = search_result.get("claims", {}).get("wdt:P1476", [])
+            if not title:
+                continue
+            yield SearchResult(
+                title=title[0],
+                key=self.get_remote_id(search_result.get("uri")),
+                author=search_result.get("description"),
+                view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
+                cover=self.get_cover_url(search_result.get("image")),
+                connector=self,
+            )

    def is_work_data(self, data):
        return data.get("type") == "work"
--- a/bookwyrm/connectors/openlibrary.py
+++ b/bookwyrm/connectors/openlibrary.py
@ -152,39 +152,35 @@ class Connector(AbstractConnector):
        image_name = f"{cover_id}-{size}.jpg"
        return f"{self.covers_url}/b/id/{image_name}"

-    def parse_search_data(self, data):
-        return data.get("docs")
-
-    def format_search_result(self, search_result):
-        # build the remote id from the openlibrary key
-        key = self.books_url + search_result["key"]
-        author = search_result.get("author_name") or ["Unknown"]
-        cover_blob = search_result.get("cover_i")
-        cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
-        return SearchResult(
-            title=search_result.get("title"),
-            key=key,
-            author=", ".join(author),
-            connector=self,
-            year=search_result.get("first_publish_year"),
-            cover=cover,
-        )
+    def parse_search_data(self, data, min_confidence):
+        for search_result in data.get("docs"):
+            # build the remote id from the openlibrary key
+            key = self.books_url + search_result["key"]
+            author = search_result.get("author_name") or ["Unknown"]
+            cover_blob = search_result.get("cover_i")
+            cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
+            yield SearchResult(
+                title=search_result.get("title"),
+                key=key,
+                author=", ".join(author),
+                connector=self,
+                year=search_result.get("first_publish_year"),
+                cover=cover,
+            )

    def parse_isbn_search_data(self, data):
-        return list(data.values())
-
-    def format_isbn_search_result(self, search_result):
-        # build the remote id from the openlibrary key
-        key = self.books_url + search_result["key"]
-        authors = search_result.get("authors") or [{"name": "Unknown"}]
-        author_names = [author.get("name") for author in authors]
-        return SearchResult(
-            title=search_result.get("title"),
-            key=key,
-            author=", ".join(author_names),
-            connector=self,
-            year=search_result.get("publish_date"),
-        )
+        for search_result in list(data.values()):
+            # build the remote id from the openlibrary key
+            key = self.books_url + search_result["key"]
+            authors = search_result.get("authors") or [{"name": "Unknown"}]
+            author_names = [author.get("name") for author in authors]
+            yield SearchResult(
+                title=search_result.get("title"),
+                key=key,
+                author=", ".join(author_names),
+                connector=self,
+                year=search_result.get("publish_date"),
+            )

    def load_edition_data(self, olkey):
        """query openlibrary for editions of a work"""
--- a/bookwyrm/management/commands/initdb.py
+++ b/bookwyrm/management/commands/initdb.py
@ -89,7 +89,7 @@ def init_connectors():
        covers_url="https://inventaire.io",
        search_url="https://inventaire.io/api/search?types=works&types=works&search=",
        isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A",
-        priority=3,
+        priority=1,
    )

    models.Connector.objects.create(
@ -101,7 +101,7 @@ def init_connectors():
        covers_url="https://covers.openlibrary.org",
        search_url="https://openlibrary.org/search?q=",
        isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:",
-        priority=3,
+        priority=1,
    )


--- a/bookwyrm/settings.py
+++ b/bookwyrm/settings.py
@ -212,7 +212,7 @@ STREAMS = [

 # Search configuration
 # total time in seconds that the instance will spend searching connectors
-SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 15))
+SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8))
 # timeout for a query to an individual connector
 QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5))

--- a/bookwyrm/templates/search/book.html
+++ b/bookwyrm/templates/search/book.html
@ -36,7 +36,7 @@
    {% if result_set.results %}
    <section class="mb-5">
        {% if not result_set.connector.local %}
-        <details class="details-panel box" {% if forloop.first %}open{% endif %}>
+        <details class="details-panel box" open>
        {% endif %}
            {% if not result_set.connector.local %}
            <summary class="is-flex is-align-items-center is-flex-wrap-wrap is-gap-2">
--- a/bookwyrm/tests/connectors/test_abstract_connector.py
+++ b/bookwyrm/tests/connectors/test_abstract_connector.py
@ -42,15 +42,9 @@ class AbstractConnector(TestCase):

            generated_remote_link_field = "openlibrary_link"

-            def format_search_result(self, search_result):
-                return search_result
-
-            def parse_search_data(self, data):
+            def parse_search_data(self, data, min_confidence):
                return data

-            def format_isbn_search_result(self, search_result):
-                return search_result
-
            def parse_isbn_search_data(self, data):
                return data

--- a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py
+++ b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py
@ -1,6 +1,5 @@
 """ testing book data connectors """
 from django.test import TestCase
-import responses

 from bookwyrm import models
 from bookwyrm.connectors import abstract_connector
@ -25,18 +24,12 @@ class AbstractConnector(TestCase):
        class TestConnector(abstract_connector.AbstractMinimalConnector):
            """nothing added here"""

-            def format_search_result(self, search_result):
-                return search_result
-
            def get_or_create_book(self, remote_id):
                pass

-            def parse_search_data(self, data):
+            def parse_search_data(self, data, min_confidence):
                return data

-            def format_isbn_search_result(self, search_result):
-                return search_result
-
            def parse_isbn_search_data(self, data):
                return data

@ -54,45 +47,6 @@ class AbstractConnector(TestCase):
        self.assertIsNone(connector.name)
        self.assertEqual(connector.identifier, "example.com")

-    @responses.activate
-    def test_search(self):
-        """makes an http request to the outside service"""
-        responses.add(
-            responses.GET,
-            "https://example.com/search?q=a%20book%20title",
-            json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
-            status=200,
-        )
-        results = self.test_connector.search("a book title")
-        self.assertEqual(len(results), 10)
-        self.assertEqual(results[0], "a")
-        self.assertEqual(results[1], "b")
-        self.assertEqual(results[2], "c")
-
-    @responses.activate
-    def test_search_min_confidence(self):
-        """makes an http request to the outside service"""
-        responses.add(
-            responses.GET,
-            "https://example.com/search?q=a%20book%20title&min_confidence=1",
-            json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
-            status=200,
-        )
-        results = self.test_connector.search("a book title", min_confidence=1)
-        self.assertEqual(len(results), 10)
-
-    @responses.activate
-    def test_isbn_search(self):
-        """makes an http request to the outside service"""
-        responses.add(
-            responses.GET,
-            "https://example.com/isbn?q=123456",
-            json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
-            status=200,
-        )
-        results = self.test_connector.isbn_search("123456")
-        self.assertEqual(len(results), 10)
-
    def test_create_mapping(self):
        """maps remote fields for book data to bookwyrm activitypub fields"""
        mapping = Mapping("isbn")
--- a/bookwyrm/tests/connectors/test_bookwyrm_connector.py
+++ b/bookwyrm/tests/connectors/test_bookwyrm_connector.py
@ -30,14 +30,11 @@ class BookWyrmConnector(TestCase):
        result = self.connector.get_or_create_book(book.remote_id)
        self.assertEqual(book, result)

-    def test_format_search_result(self):
+    def test_parse_search_data(self):
        """create a SearchResult object from search response json"""
        datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json")
        search_data = json.loads(datafile.read_bytes())
-        results = self.connector.parse_search_data(search_data)
-        self.assertIsInstance(results, list)
-
-        result = self.connector.format_search_result(results[0])
+        result = list(self.connector.parse_search_data(search_data, 0))[0]
        self.assertIsInstance(result, SearchResult)
        self.assertEqual(result.title, "Jonathan Strange and Mr Norrell")
        self.assertEqual(result.key, "https://example.com/book/122")
@ -45,10 +42,9 @@ class BookWyrmConnector(TestCase):
        self.assertEqual(result.year, 2017)
        self.assertEqual(result.connector, self.connector)

-    def test_format_isbn_search_result(self):
+    def test_parse_isbn_search_data(self):
        """just gotta attach the connector"""
        datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json")
        search_data = json.loads(datafile.read_bytes())
-        results = self.connector.parse_isbn_search_data(search_data)
-        result = self.connector.format_isbn_search_result(results[0])
+        result = list(self.connector.parse_isbn_search_data(search_data))[0]
        self.assertEqual(result.connector, self.connector)
--- a/bookwyrm/tests/connectors/test_connector_manager.py
+++ b/bookwyrm/tests/connectors/test_connector_manager.py
@ -49,39 +49,11 @@ class ConnectorManager(TestCase):
        self.assertEqual(len(connectors), 1)
        self.assertIsInstance(connectors[0], BookWyrmConnector)

-    @responses.activate
-    def test_search_plaintext(self):
-        """search all connectors"""
-        responses.add(
-            responses.GET,
-            "http://fake.ciom/search/Example?min_confidence=0.1",
-            json=[{"title": "Hello", "key": "https://www.example.com/search/1"}],
-        )
-        results = connector_manager.search("Example")
-        self.assertEqual(len(results), 1)
-        self.assertEqual(len(results[0]["results"]), 1)
-        self.assertEqual(results[0]["connector"].identifier, "test_connector_remote")
-        self.assertEqual(results[0]["results"][0].title, "Hello")
-
    def test_search_empty_query(self):
        """don't panic on empty queries"""
        results = connector_manager.search("")
        self.assertEqual(results, [])

-    @responses.activate
-    def test_search_isbn(self):
-        """special handling if a query resembles an isbn"""
-        responses.add(
-            responses.GET,
-            "http://fake.ciom/isbn/0000000000",
-            json=[{"title": "Hello", "key": "https://www.example.com/search/1"}],
-        )
-        results = connector_manager.search("0000000000")
-        self.assertEqual(len(results), 1)
-        self.assertEqual(len(results[0]["results"]), 1)
-        self.assertEqual(results[0]["connector"].identifier, "test_connector_remote")
-        self.assertEqual(results[0]["results"][0].title, "Hello")
-
    def test_first_search_result(self):
        """only get one search result"""
        result = connector_manager.first_search_result("Example")
--- a/bookwyrm/tests/connectors/test_inventaire_connector.py
+++ b/bookwyrm/tests/connectors/test_inventaire_connector.py
@ -66,38 +66,14 @@ class Inventaire(TestCase):
        with self.assertRaises(ConnectorException):
            self.connector.get_book_data("https://test.url/ok")

-    @responses.activate
-    def test_search(self):
-        """min confidence filtering"""
-        responses.add(
-            responses.GET,
-            "https://inventaire.io/search?q=hi",
-            json={
-                "results": [
-                    {
-                        "_score": 200,
-                        "label": "hello",
-                    },
-                    {
-                        "_score": 100,
-                        "label": "hi",
-                    },
-                ],
-            },
-        )
-        results = self.connector.search("hi", min_confidence=0.5)
-        self.assertEqual(len(results), 1)
-        self.assertEqual(results[0].title, "hello")
-
-    def test_format_search_result(self):
+    def test_parse_search_data(self):
        """json to search result objs"""
        search_file = pathlib.Path(__file__).parent.joinpath(
            "../data/inventaire_search.json"
        )
        search_results = json.loads(search_file.read_bytes())

-        results = self.connector.parse_search_data(search_results)
-        formatted = self.connector.format_search_result(results[0])
+        formatted = list(self.connector.parse_search_data(search_results, 0))[0]

        self.assertEqual(formatted.title, "The Stories of Vladimir Nabokov")
        self.assertEqual(
@ -178,15 +154,14 @@ class Inventaire(TestCase):
        result = self.connector.resolve_keys(keys)
        self.assertEqual(result, ["epistolary novel", "crime novel"])

-    def test_isbn_search(self):
+    def test_pase_isbn_search_data(self):
        """another search type"""
        search_file = pathlib.Path(__file__).parent.joinpath(
            "../data/inventaire_isbn_search.json"
        )
        search_results = json.loads(search_file.read_bytes())

-        results = self.connector.parse_isbn_search_data(search_results)
-        formatted = self.connector.format_isbn_search_result(results[0])
+        formatted = list(self.connector.parse_isbn_search_data(search_results))[0]

        self.assertEqual(formatted.title, "L'homme aux cercles bleus")
        self.assertEqual(
@ -198,25 +173,12 @@ class Inventaire(TestCase):
            "https://covers.inventaire.io/img/entities/12345",
        )

-    def test_isbn_search_empty(self):
+    def test_parse_isbn_search_data_empty(self):
        """another search type"""
        search_results = {}
-        results = self.connector.parse_isbn_search_data(search_results)
+        results = list(self.connector.parse_isbn_search_data(search_results))
        self.assertEqual(results, [])

-    def test_isbn_search_no_title(self):
-        """another search type"""
-        search_file = pathlib.Path(__file__).parent.joinpath(
-            "../data/inventaire_isbn_search.json"
-        )
-        search_results = json.loads(search_file.read_bytes())
-        search_results["entities"]["isbn:9782290349229"]["claims"]["wdt:P1476"] = None
-
-        result = self.connector.format_isbn_search_result(
-            search_results.get("entities")
-        )
-        self.assertIsNone(result)
-
    def test_is_work_data(self):
        """is it a work"""
        work_file = pathlib.Path(__file__).parent.joinpath(
--- a/bookwyrm/tests/connectors/test_openlibrary_connector.py
+++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py
@ -122,21 +122,11 @@ class Openlibrary(TestCase):
        self.assertEqual(result, "https://covers.openlibrary.org/b/id/image-L.jpg")

    def test_parse_search_result(self):
-        """extract the results from the search json response"""
-        datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
-        search_data = json.loads(datafile.read_bytes())
-        result = self.connector.parse_search_data(search_data)
-        self.assertIsInstance(result, list)
-        self.assertEqual(len(result), 2)
-
-    def test_format_search_result(self):
        """translate json from openlibrary into SearchResult"""
        datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
        search_data = json.loads(datafile.read_bytes())
-        results = self.connector.parse_search_data(search_data)
-        self.assertIsInstance(results, list)
+        result = list(self.connector.parse_search_data(search_data, 0))[0]

-        result = self.connector.format_search_result(results[0])
        self.assertIsInstance(result, SearchResult)
        self.assertEqual(result.title, "This Is How You Lose the Time War")
        self.assertEqual(result.key, "https://openlibrary.org/works/OL20639540W")
@ -148,18 +138,10 @@ class Openlibrary(TestCase):
        """extract the results from the search json response"""
        datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
        search_data = json.loads(datafile.read_bytes())
-        result = self.connector.parse_isbn_search_data(search_data)
-        self.assertIsInstance(result, list)
+        result = list(self.connector.parse_isbn_search_data(search_data))
        self.assertEqual(len(result), 1)

-    def test_format_isbn_search_result(self):
-        """translate json from openlibrary into SearchResult"""
-        datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
-        search_data = json.loads(datafile.read_bytes())
-        results = self.connector.parse_isbn_search_data(search_data)
-        self.assertIsInstance(results, list)
-
-        result = self.connector.format_isbn_search_result(results[0])
+        result = result[0]
        self.assertIsInstance(result, SearchResult)
        self.assertEqual(result.title, "Les ombres errantes")
        self.assertEqual(result.key, "https://openlibrary.org/books/OL16262504M")
--- a/bookwyrm/tests/test_book_search.py
+++ b/bookwyrm/tests/test_book_search.py
@ -102,18 +102,12 @@ class BookSearch(TestCase):
        class TestConnector(AbstractMinimalConnector):
            """nothing added here"""

-            def format_search_result(self, search_result):
-                return search_result
-
            def get_or_create_book(self, remote_id):
                pass

-            def parse_search_data(self, data):
+            def parse_search_data(self, data, min_confidence):
                return data

-            def format_isbn_search_result(self, search_result):
-                return search_result
-
            def parse_isbn_search_data(self, data):
                return data

--- a/bookwyrm/tests/views/test_search.py
+++ b/bookwyrm/tests/views/test_search.py
@ -1,6 +1,5 @@
 """ test for app action functionality """
 import json
-import pathlib
 from unittest.mock import patch

 from django.contrib.auth.models import AnonymousUser
@ -8,9 +7,9 @@ from django.http import JsonResponse
 from django.template.response import TemplateResponse
 from django.test import TestCase
 from django.test.client import RequestFactory
-import responses

 from bookwyrm import models, views
+from bookwyrm.book_search import SearchResult
 from bookwyrm.settings import DOMAIN
 from bookwyrm.tests.validate_html import validate_html

@ -65,12 +64,11 @@ class Views(TestCase):
        self.assertIsInstance(response, TemplateResponse)
        validate_html(response.render())

-    @responses.activate
    def test_search_books(self):
        """searches remote connectors"""
        view = views.Search.as_view()

-        models.Connector.objects.create(
+        connector = models.Connector.objects.create(
            identifier="example.com",
            connector_file="openlibrary",
            base_url="https://example.com",
@ -78,26 +76,24 @@ class Views(TestCase):
            covers_url="https://example.com/covers",
            search_url="https://example.com/search?q=",
        )
-        datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
-        search_data = json.loads(datafile.read_bytes())
-        responses.add(
-            responses.GET, "https://example.com/search?q=Test%20Book", json=search_data
-        )
+        mock_result = SearchResult(title="Mock Book", connector=connector, key="hello")

        request = self.factory.get("", {"q": "Test Book", "remote": True})
        request.user = self.local_user
        with patch("bookwyrm.views.search.is_api_request") as is_api:
            is_api.return_value = False
-            response = view(request)
+            with patch("bookwyrm.connectors.connector_manager.search") as remote_search:
+                remote_search.return_value = [
+                    {"results": [mock_result], "connector": connector}
+                ]
+                response = view(request)
+
        self.assertIsInstance(response, TemplateResponse)
        validate_html(response.render())
        connector_results = response.context_data["results"]
        self.assertEqual(len(connector_results), 2)
        self.assertEqual(connector_results[0]["results"][0].title, "Test Book")
-        self.assertEqual(
-            connector_results[1]["results"][0].title,
-            "This Is How You Lose the Time War",
-        )
+        self.assertEqual(connector_results[1]["results"][0].title, "Mock Book")

        # don't search remote
        request = self.factory.get("", {"q": "Test Book", "remote": True})
@ -106,7 +102,11 @@ class Views(TestCase):
        request.user = anonymous_user
        with patch("bookwyrm.views.search.is_api_request") as is_api:
            is_api.return_value = False
-            response = view(request)
+            with patch("bookwyrm.connectors.connector_manager.search") as remote_search:
+                remote_search.return_value = [
+                    {"results": [mock_result], "connector": connector}
+                ]
+                response = view(request)
        self.assertIsInstance(response, TemplateResponse)
        validate_html(response.render())
        connector_results = response.context_data["results"]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,4 @@
+aiohttp==3.8.1
 celery==5.2.2
 colorthief==0.2.1
 Django==3.2.13