Simplify identifier searches

This commit removes code that deduplicated search results for identifier
searches. If it was the case that multiple books have the same
identifier, in theory this would produce better search results, but in
practice this doesn't happen very much, is probably worth seeing when
it does, and worsens the performance of identifier search overall.
This commit is contained in:
Mouse Reeve 2022-11-15 19:39:57 -08:00
parent 807f8c02b2
commit eb61033626

View file

@ -4,7 +4,7 @@ from functools import reduce
import operator import operator
from django.contrib.postgres.search import SearchRank, SearchQuery from django.contrib.postgres.search import SearchRank, SearchQuery
from django.db.models import OuterRef, Subquery, F, Q from django.db.models import F, Q
from bookwyrm import models from bookwyrm import models
from bookwyrm import connectors from bookwyrm import connectors
@ -17,8 +17,13 @@ def search(query, min_confidence=0, filters=None, return_first=False):
filters = filters or [] filters = filters or []
if not query: if not query:
return [] return []
results = None
# first, try searching unqiue identifiers # first, try searching unqiue identifiers
# unique identifiers never have spaces, title/author usually do
if not " " in query:
results = search_identifiers(query, *filters, return_first=return_first) results = search_identifiers(query, *filters, return_first=return_first)
# if there were no identifier results...
if not results: if not results:
# then try searching title/author # then try searching title/author
results = search_title_author( results = search_title_author(
@ -35,24 +40,10 @@ def isbn_search(query):
# If the ISBN has only 9 characters, prepend missing zero # If the ISBN has only 9 characters, prepend missing zero
query = query.strip().upper().rjust(10, "0") query = query.strip().upper().rjust(10, "0")
filters = [{f: query} for f in ["isbn_10", "isbn_13"]] filters = [{f: query} for f in ["isbn_10", "isbn_13"]]
results = models.Edition.objects.filter( return models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters)) reduce(operator.or_, (Q(**f) for f in filters))
).distinct() ).distinct()
# when there are multiple editions of the same work, pick the default.
# it would be odd for this to happen.
default_editions = models.Edition.objects.filter(
parent_work=OuterRef("parent_work")
).order_by("-edition_rank")
results = (
results.annotate(default_id=Subquery(default_editions.values("id")[:1])).filter(
default_id=F("id")
)
or results
)
return results
def format_search_result(search_result): def format_search_result(search_result):
"""convert a book object into a search result object""" """convert a book object into a search result object"""
@ -88,22 +79,7 @@ def search_identifiers(query, *filters, return_first=False):
results = models.Edition.objects.filter( results = models.Edition.objects.filter(
*filters, reduce(operator.or_, (Q(**f) for f in or_filters)) *filters, reduce(operator.or_, (Q(**f) for f in or_filters))
).distinct() ).distinct()
if results.count() <= 1:
if return_first:
return results.first()
return results
# when there are multiple editions of the same work, pick the default.
# it would be odd for this to happen.
default_editions = models.Edition.objects.filter(
parent_work=OuterRef("parent_work")
).order_by("-edition_rank")
results = (
results.annotate(default_id=Subquery(default_editions.values("id")[:1])).filter(
default_id=F("id")
)
or results
)
if return_first: if return_first:
return results.first() return results.first()
return results return results