From 7ce0890a41d98a15fc79ac16cbbef0aab0eed22e Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 29 Oct 2020 15:29:23 -0700 Subject: [PATCH] Stop assuming every book is Hamlet --- bookwyrm/books_manager.py | 12 ++++++------ bookwyrm/connectors/abstract_connector.py | 19 ++++++++++--------- bookwyrm/connectors/openlibrary.py | 8 ++++---- bookwyrm/connectors/self_connector.py | 13 +++++++------ bookwyrm/models/import_job.py | 8 ++++++-- bookwyrm/templates/search_results.html | 1 + bookwyrm/tests/models/test_import_model.py | 8 +++----- 7 files changed, 37 insertions(+), 32 deletions(-) diff --git a/bookwyrm/books_manager.py b/bookwyrm/books_manager.py index bfc543de..37a31766 100644 --- a/bookwyrm/books_manager.py +++ b/bookwyrm/books_manager.py @@ -64,14 +64,14 @@ def load_more_data(book_id): connector.expand_book_data(book) -def search(query): +def search(query, min_confidence=0.1): ''' find books based on arbitary keywords ''' results = [] dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year) result_index = set() for connector in get_connectors(): try: - result_set = connector.search(query) + result_set = connector.search(query, min_confidence=min_confidence) except HTTPError: continue @@ -87,16 +87,16 @@ def search(query): return results -def local_search(query): +def local_search(query, min_confidence=0.1): ''' only look at local search results ''' connector = load_connector(models.Connector.objects.get(local=True)) - return connector.search(query) + return connector.search(query, min_confidence=min_confidence) -def first_search_result(query): +def first_search_result(query, min_confidence=0.1): ''' search until you find a result that fits ''' for connector in get_connectors(): - result = connector.search(query) + result = connector.search(query, min_confidence=min_confidence) if result: return result[0] return None diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 72dda4e9..a34eb301 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,8 +1,8 @@ ''' functionality outline for a book data connector ''' from abc import ABC, abstractmethod +from dataclasses import dataclass from dateutil import parser import pytz -from urllib3.exceptions import ProtocolError import requests from requests import HTTPError @@ -52,7 +52,7 @@ class AbstractConnector(ABC): return True - def search(self, query): + def search(self, query, min_confidence=None): ''' free text search ''' resp = requests.get( '%s%s' % (self.search_url, query), @@ -160,7 +160,7 @@ class AbstractConnector(ABC): author_text = [] for author in self.get_authors_from_data(data): book.authors.add(author) - author_text += author.display_name + author_text.append(author.display_name) book.author_text = ', '.join(author_text) book.save() @@ -298,7 +298,7 @@ def get_data(url): 'Accept': 'application/json; charset=utf-8', }, ) - except ProtocolError: + except ConnectionError: raise ConnectorException() if not resp.ok: resp.raise_for_status() @@ -306,13 +306,14 @@ def get_data(url): return data +@dataclass class SearchResult: ''' standardized search result object ''' - def __init__(self, title, key, author, year): - self.title = title - self.key = key - self.author = author - self.year = year + title: str + key: str + author: str + year: str + confidence: int = 1 def __repr__(self): return "".format( diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index d70ab3e2..0ae3ce35 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -129,10 +129,10 @@ class Connector(AbstractConnector): key = self.books_url + search_result['key'] author = search_result.get('author_name') or ['Unknown'] return SearchResult( - search_result.get('title'), - key, - ', '.join(author), - search_result.get('first_publish_year'), + title=search_result.get('title'), + key=key, + author=', '.join(author), + year=search_result.get('first_publish_year'), ) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 2711bb1a..0e77ecf6 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult class Connector(AbstractConnector): ''' instantiate a connector ''' - def search(self, query): + def search(self, query, min_confidence=0.1): ''' right now you can't search bookwyrm sorry, but when that gets implemented it will totally rule ''' vector = SearchVector('title', weight='A') +\ @@ -28,7 +28,7 @@ class Connector(AbstractConnector): ).annotate( rank=SearchRank(vector, query) ).filter( - rank__gt=0 + rank__gt=min_confidence ).order_by('-rank') results = results.filter(default=True) or results @@ -42,11 +42,12 @@ class Connector(AbstractConnector): def format_search_result(self, search_result): return SearchResult( - search_result.title, - search_result.local_id, - search_result.author_text, - search_result.published_date.year if \ + title=search_result.title, + key=search_result.local_id, + author=search_result.author_text, + year=search_result.published_date.year if \ search_result.published_date else None, + confidence=search_result.rank, ) diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index bd63ea79..240e0694 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -63,7 +63,9 @@ class ImportItem(models.Model): def get_book_from_isbn(self): ''' search by isbn ''' - search_result = books_manager.first_search_result(self.isbn) + search_result = books_manager.first_search_result( + self.isbn, min_confidence=0.5 + ) if search_result: try: # don't crash the import when the connector fails @@ -79,7 +81,9 @@ class ImportItem(models.Model): self.data['Title'], self.data['Author'] ) - search_result = books_manager.first_search_result(search_term) + search_result = books_manager.first_search_result( + search_term, min_confidence=0.5 + ) if search_result: try: return books_manager.get_or_create_book(search_result.key) diff --git a/bookwyrm/templates/search_results.html b/bookwyrm/templates/search_results.html index bd5096fe..489386cd 100644 --- a/bookwyrm/templates/search_results.html +++ b/bookwyrm/templates/search_results.html @@ -14,6 +14,7 @@ {% for result in result_set.results %}
+ {{ result.confidence }}
{% csrf_token %} diff --git a/bookwyrm/tests/models/test_import_model.py b/bookwyrm/tests/models/test_import_model.py index 5e488199..1d5aaa72 100644 --- a/bookwyrm/tests/models/test_import_model.py +++ b/bookwyrm/tests/models/test_import_model.py @@ -24,7 +24,7 @@ class ImportJob(TestCase): 'Number of Pages': 416, 'Year Published': 2019, 'Original Publication Year': 2019, - 'Date Read': '2019/04/09', + 'Date Read': '2019/04/12', 'Date Added': '2019/04/09', 'Bookshelves': '', 'Bookshelves with positions': '', @@ -97,11 +97,9 @@ class ImportJob(TestCase): self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date) def test_read_reads(self): - expected = [models.ReadThrough( - finish_date=datetime.datetime(2019, 4, 9, 0, 0))] actual = models.ImportItem.objects.get(index=2) - self.assertEqual(actual.reads[0].start_date, expected[0].start_date) - self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date) + self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0)) + self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0)) def test_unread_reads(self): expected = []