From a2e8cf1993b81a074c1776e1e3105aa323458833 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 2 Jan 2021 15:15:25 -0800 Subject: [PATCH] Return best matching edition instead of default in search --- bookwyrm/connectors/self_connector.py | 38 ++++++++++--------- .../tests/connectors/test_self_connector.py | 11 +++++- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 921f517f..957adafa 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -1,6 +1,6 @@ ''' using a bookwyrm instance as a source of book data ''' from django.contrib.postgres.search import SearchRank, SearchVector -from django.db.models import F +from django.db.models import Count, F from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult @@ -12,16 +12,7 @@ class Connector(AbstractConnector): ''' search your local database ''' vector = SearchVector('title', weight='A') +\ SearchVector('subtitle', weight='B') +\ - SearchVector('authors__name', weight='C') +\ - SearchVector('isbn_13', weight='A') +\ - SearchVector('isbn_10', weight='A') +\ - SearchVector('openlibrary_key', weight='C') +\ - SearchVector('goodreads_key', weight='C') +\ - SearchVector('asin', weight='C') +\ - SearchVector('oclc_number', weight='C') +\ - SearchVector('remote_id', weight='C') +\ - SearchVector('description', weight='D') +\ - SearchVector('series', weight='D') + SearchVector('authors__name', weight='C') results = models.Edition.objects.annotate( search=vector @@ -31,15 +22,26 @@ class Connector(AbstractConnector): rank__gt=min_confidence ).order_by('-rank') - # remove non-default editions, if possible - results = results.filter(parent_work__default_edition__id=F('id')) \ - or results + # when there are multiple editions of the same work, pick the closest + editions_of_work = results.values( + 'parent_work' + ).annotate( + Count('parent_work') + ).values_list('parent_work') search_results = [] - for book in set(results[:10]): - search_results.append( - self.format_search_result(book) - ) + for work_id in set(editions_of_work): + editions = results.filter(parent_work=work_id) + default = editions.filter(parent_work__default_edition=F('id')) + default_rank = default.first().rank if default.exists() else 0 + # if mutliple books have the top rank, pick the default edition + if default_rank == editions.first().rank: + selected = default.first() + else: + selected = editions.first() + search_results.append(self.format_search_result(selected)) + if len(search_results) >= 10: + break return search_results diff --git a/bookwyrm/tests/connectors/test_self_connector.py b/bookwyrm/tests/connectors/test_self_connector.py index 1cc5983c..a28ca12a 100644 --- a/bookwyrm/tests/connectors/test_self_connector.py +++ b/bookwyrm/tests/connectors/test_self_connector.py @@ -76,10 +76,10 @@ class SelfConnector(TestCase): self.assertEqual(results[2].title, 'Edition of Example Work') - def test_search_default_filter(self): + def test_search_multiple_editions(self): ''' it should get rid of duplicate editions for the same work ''' work = models.Work.objects.create(title='Work Title') - models.Edition.objects.create( + edition_1 = models.Edition.objects.create( title='Edition 1 Title', parent_work=work) edition_2 = models.Edition.objects.create( title='Edition 2 Title', parent_work=work) @@ -88,10 +88,17 @@ class SelfConnector(TestCase): work.default_edition = edition_2 work.save() + # pick the best edition + results = self.connector.search('Edition 1 Title') + self.assertEqual(len(results), 1) + self.assertEqual(results[0].key, edition_1.remote_id) + + # pick the default edition when no match is best results = self.connector.search('Edition Title') self.assertEqual(len(results), 1) self.assertEqual(results[0].key, edition_2.remote_id) + # only matches one edition, so no deduplication takes place results = self.connector.search('Fish') self.assertEqual(len(results), 1) self.assertEqual(results[0].key, edition_3.remote_id)