Return best matching edition instead of default in search

This commit is contained in:
Mouse Reeve 2021-01-02 15:15:25 -08:00
parent afa1921968
commit a2e8cf1993
2 changed files with 29 additions and 20 deletions

View file

@ -1,6 +1,6 @@
''' using a bookwyrm instance as a source of book data '''
from django.contrib.postgres.search import SearchRank, SearchVector
from django.db.models import F
from django.db.models import Count, F
from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult
@ -12,16 +12,7 @@ class Connector(AbstractConnector):
''' search your local database '''
vector = SearchVector('title', weight='A') +\
SearchVector('subtitle', weight='B') +\
SearchVector('authors__name', weight='C') +\
SearchVector('isbn_13', weight='A') +\
SearchVector('isbn_10', weight='A') +\
SearchVector('openlibrary_key', weight='C') +\
SearchVector('goodreads_key', weight='C') +\
SearchVector('asin', weight='C') +\
SearchVector('oclc_number', weight='C') +\
SearchVector('remote_id', weight='C') +\
SearchVector('description', weight='D') +\
SearchVector('series', weight='D')
SearchVector('authors__name', weight='C')
results = models.Edition.objects.annotate(
search=vector
@ -31,15 +22,26 @@ class Connector(AbstractConnector):
rank__gt=min_confidence
).order_by('-rank')
# remove non-default editions, if possible
results = results.filter(parent_work__default_edition__id=F('id')) \
or results
# when there are multiple editions of the same work, pick the closest
editions_of_work = results.values(
'parent_work'
).annotate(
Count('parent_work')
).values_list('parent_work')
search_results = []
for book in set(results[:10]):
search_results.append(
self.format_search_result(book)
)
for work_id in set(editions_of_work):
editions = results.filter(parent_work=work_id)
default = editions.filter(parent_work__default_edition=F('id'))
default_rank = default.first().rank if default.exists() else 0
# if mutliple books have the top rank, pick the default edition
if default_rank == editions.first().rank:
selected = default.first()
else:
selected = editions.first()
search_results.append(self.format_search_result(selected))
if len(search_results) >= 10:
break
return search_results

View file

@ -76,10 +76,10 @@ class SelfConnector(TestCase):
self.assertEqual(results[2].title, 'Edition of Example Work')
def test_search_default_filter(self):
def test_search_multiple_editions(self):
''' it should get rid of duplicate editions for the same work '''
work = models.Work.objects.create(title='Work Title')
models.Edition.objects.create(
edition_1 = models.Edition.objects.create(
title='Edition 1 Title', parent_work=work)
edition_2 = models.Edition.objects.create(
title='Edition 2 Title', parent_work=work)
@ -88,10 +88,17 @@ class SelfConnector(TestCase):
work.default_edition = edition_2
work.save()
# pick the best edition
results = self.connector.search('Edition 1 Title')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_1.remote_id)
# pick the default edition when no match is best
results = self.connector.search('Edition Title')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_2.remote_id)
# only matches one edition, so no deduplication takes place
results = self.connector.search('Fish')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_3.remote_id)