From a413c879637e0210c12bbb98d6d41d580441dbaa Mon Sep 17 00:00:00 2001
From: Mouse Reeve <mousereeve@riseup.net>
Date: Sat, 2 Jan 2021 15:48:59 -0800
Subject: [PATCH] Separate search for unique identifiers out from text search

---
 bookwyrm/connectors/self_connector.py | 91 ++++++++++++++++++---------
 1 file changed, 60 insertions(+), 31 deletions(-)

diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py
index 957adafad..04fea7350 100644
--- a/bookwyrm/connectors/self_connector.py
+++ b/bookwyrm/connectors/self_connector.py
@@ -1,6 +1,9 @@
 ''' using a bookwyrm instance as a source of book data '''
+from functools import reduce
+import operator
+
 from django.contrib.postgres.search import SearchRank, SearchVector
-from django.db.models import Count, F
+from django.db.models import Count, F, Q
 
 from bookwyrm import models
 from .abstract_connector import AbstractConnector, SearchResult
@@ -10,36 +13,14 @@ class Connector(AbstractConnector):
     ''' instantiate a connector  '''
     def search(self, query, min_confidence=0.1):
         ''' search your local database '''
-        vector = SearchVector('title', weight='A') +\
-            SearchVector('subtitle', weight='B') +\
-            SearchVector('authors__name', weight='C')
-
-        results = models.Edition.objects.annotate(
-            search=vector
-        ).annotate(
-            rank=SearchRank(vector, query)
-        ).filter(
-            rank__gt=min_confidence
-        ).order_by('-rank')
-
-        # when there are multiple editions of the same work, pick the closest
-        editions_of_work = results.values(
-            'parent_work'
-        ).annotate(
-            Count('parent_work')
-        ).values_list('parent_work')
-
+        # first, try searching unqiue identifiers
+        results = search_identifiers(query)
+        if not results:
+            # then try searching title/author
+            results = search_title_author(query, min_confidence)
         search_results = []
-        for work_id in set(editions_of_work):
-            editions = results.filter(parent_work=work_id)
-            default = editions.filter(parent_work__default_edition=F('id'))
-            default_rank = default.first().rank if default.exists() else 0
-            # if mutliple books have the top rank, pick the default edition
-            if default_rank == editions.first().rank:
-                selected = default.first()
-            else:
-                selected = editions.first()
-            search_results.append(self.format_search_result(selected))
+        for result in results:
+            search_results.append(self.format_search_result(result))
             if len(search_results) >= 10:
                 break
         return search_results
@@ -53,7 +34,8 @@ class Connector(AbstractConnector):
             year=search_result.published_date.year if \
                     search_result.published_date else None,
             connector=self,
-            confidence=search_result.rank,
+            confidence=search_result.rank if \
+                    hasattr(search_result, 'rank') else 1,
         )
 
 
@@ -75,3 +57,50 @@ class Connector(AbstractConnector):
 
     def expand_book_data(self, book):
         pass
+
+
+def search_identifiers(query):
+    ''' tries remote_id, isbn; defined as dedupe fields on the model '''
+    filters = [{f.name: query} for f in models.Edition._meta.get_fields() \
+        if hasattr(f, 'deduplication_field') and f.deduplication_field]
+    results = models.Edition.objects.filter(
+        reduce(operator.or_, (Q(**f) for f in filters))
+    ).distinct()
+
+    # when there are multiple editions of the same work, pick the default.
+    # it would be odd for this to happen.
+    return results.filter(parent_work__default_edition__id=F('id')) \
+            or results
+
+
+def search_title_author(query, min_confidence):
+    ''' searches for title and author '''
+    print('DON"T BOTHER')
+    vector = SearchVector('title', weight='A') +\
+        SearchVector('subtitle', weight='B') +\
+        SearchVector('authors__name', weight='C')
+
+    results = models.Edition.objects.annotate(
+        search=vector
+    ).annotate(
+        rank=SearchRank(vector, query)
+    ).filter(
+        rank__gt=min_confidence
+    ).order_by('-rank')
+
+    # when there are multiple editions of the same work, pick the closest
+    editions_of_work = results.values(
+        'parent_work'
+    ).annotate(
+        Count('parent_work')
+    ).values_list('parent_work')
+
+    for work_id in set(editions_of_work):
+        editions = results.filter(parent_work=work_id)
+        default = editions.filter(parent_work__default_edition=F('id'))
+        default_rank = default.first().rank if default.exists() else 0
+        # if mutliple books have the top rank, pick the default edition
+        if default_rank == editions.first().rank:
+            yield default.first()
+        else:
+            yield editions.first()