mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-24 00:50:35 +00:00
Stop assuming every book is Hamlet
This commit is contained in:
parent
a46d7f5dc7
commit
7ce0890a41
7 changed files with 37 additions and 32 deletions
|
@ -64,14 +64,14 @@ def load_more_data(book_id):
|
|||
connector.expand_book_data(book)
|
||||
|
||||
|
||||
def search(query):
|
||||
def search(query, min_confidence=0.1):
|
||||
''' find books based on arbitary keywords '''
|
||||
results = []
|
||||
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
||||
result_index = set()
|
||||
for connector in get_connectors():
|
||||
try:
|
||||
result_set = connector.search(query)
|
||||
result_set = connector.search(query, min_confidence=min_confidence)
|
||||
except HTTPError:
|
||||
continue
|
||||
|
||||
|
@ -87,16 +87,16 @@ def search(query):
|
|||
return results
|
||||
|
||||
|
||||
def local_search(query):
|
||||
def local_search(query, min_confidence=0.1):
|
||||
''' only look at local search results '''
|
||||
connector = load_connector(models.Connector.objects.get(local=True))
|
||||
return connector.search(query)
|
||||
return connector.search(query, min_confidence=min_confidence)
|
||||
|
||||
|
||||
def first_search_result(query):
|
||||
def first_search_result(query, min_confidence=0.1):
|
||||
''' search until you find a result that fits '''
|
||||
for connector in get_connectors():
|
||||
result = connector.search(query)
|
||||
result = connector.search(query, min_confidence=min_confidence)
|
||||
if result:
|
||||
return result[0]
|
||||
return None
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
''' functionality outline for a book data connector '''
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from dateutil import parser
|
||||
import pytz
|
||||
from urllib3.exceptions import ProtocolError
|
||||
import requests
|
||||
from requests import HTTPError
|
||||
|
||||
|
@ -52,7 +52,7 @@ class AbstractConnector(ABC):
|
|||
return True
|
||||
|
||||
|
||||
def search(self, query):
|
||||
def search(self, query, min_confidence=None):
|
||||
''' free text search '''
|
||||
resp = requests.get(
|
||||
'%s%s' % (self.search_url, query),
|
||||
|
@ -160,7 +160,7 @@ class AbstractConnector(ABC):
|
|||
author_text = []
|
||||
for author in self.get_authors_from_data(data):
|
||||
book.authors.add(author)
|
||||
author_text += author.display_name
|
||||
author_text.append(author.display_name)
|
||||
book.author_text = ', '.join(author_text)
|
||||
book.save()
|
||||
|
||||
|
@ -298,7 +298,7 @@ def get_data(url):
|
|||
'Accept': 'application/json; charset=utf-8',
|
||||
},
|
||||
)
|
||||
except ProtocolError:
|
||||
except ConnectionError:
|
||||
raise ConnectorException()
|
||||
if not resp.ok:
|
||||
resp.raise_for_status()
|
||||
|
@ -306,13 +306,14 @@ def get_data(url):
|
|||
return data
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
''' standardized search result object '''
|
||||
def __init__(self, title, key, author, year):
|
||||
self.title = title
|
||||
self.key = key
|
||||
self.author = author
|
||||
self.year = year
|
||||
title: str
|
||||
key: str
|
||||
author: str
|
||||
year: str
|
||||
confidence: int = 1
|
||||
|
||||
def __repr__(self):
|
||||
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
||||
|
|
|
@ -129,10 +129,10 @@ class Connector(AbstractConnector):
|
|||
key = self.books_url + search_result['key']
|
||||
author = search_result.get('author_name') or ['Unknown']
|
||||
return SearchResult(
|
||||
search_result.get('title'),
|
||||
key,
|
||||
', '.join(author),
|
||||
search_result.get('first_publish_year'),
|
||||
title=search_result.get('title'),
|
||||
key=key,
|
||||
author=', '.join(author),
|
||||
year=search_result.get('first_publish_year'),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult
|
|||
|
||||
class Connector(AbstractConnector):
|
||||
''' instantiate a connector '''
|
||||
def search(self, query):
|
||||
def search(self, query, min_confidence=0.1):
|
||||
''' right now you can't search bookwyrm sorry, but when
|
||||
that gets implemented it will totally rule '''
|
||||
vector = SearchVector('title', weight='A') +\
|
||||
|
@ -28,7 +28,7 @@ class Connector(AbstractConnector):
|
|||
).annotate(
|
||||
rank=SearchRank(vector, query)
|
||||
).filter(
|
||||
rank__gt=0
|
||||
rank__gt=min_confidence
|
||||
).order_by('-rank')
|
||||
results = results.filter(default=True) or results
|
||||
|
||||
|
@ -42,11 +42,12 @@ class Connector(AbstractConnector):
|
|||
|
||||
def format_search_result(self, search_result):
|
||||
return SearchResult(
|
||||
search_result.title,
|
||||
search_result.local_id,
|
||||
search_result.author_text,
|
||||
search_result.published_date.year if \
|
||||
title=search_result.title,
|
||||
key=search_result.local_id,
|
||||
author=search_result.author_text,
|
||||
year=search_result.published_date.year if \
|
||||
search_result.published_date else None,
|
||||
confidence=search_result.rank,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -63,7 +63,9 @@ class ImportItem(models.Model):
|
|||
|
||||
def get_book_from_isbn(self):
|
||||
''' search by isbn '''
|
||||
search_result = books_manager.first_search_result(self.isbn)
|
||||
search_result = books_manager.first_search_result(
|
||||
self.isbn, min_confidence=0.5
|
||||
)
|
||||
if search_result:
|
||||
try:
|
||||
# don't crash the import when the connector fails
|
||||
|
@ -79,7 +81,9 @@ class ImportItem(models.Model):
|
|||
self.data['Title'],
|
||||
self.data['Author']
|
||||
)
|
||||
search_result = books_manager.first_search_result(search_term)
|
||||
search_result = books_manager.first_search_result(
|
||||
search_term, min_confidence=0.5
|
||||
)
|
||||
if search_result:
|
||||
try:
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
{% for result in result_set.results %}
|
||||
<div>
|
||||
{{ result.confidence }}
|
||||
<form action="/resolve_book" method="POST">
|
||||
{% csrf_token %}
|
||||
<input type="hidden" name="remote_id" value="{{ result.key }}">
|
||||
|
|
|
@ -24,7 +24,7 @@ class ImportJob(TestCase):
|
|||
'Number of Pages': 416,
|
||||
'Year Published': 2019,
|
||||
'Original Publication Year': 2019,
|
||||
'Date Read': '2019/04/09',
|
||||
'Date Read': '2019/04/12',
|
||||
'Date Added': '2019/04/09',
|
||||
'Bookshelves': '',
|
||||
'Bookshelves with positions': '',
|
||||
|
@ -97,11 +97,9 @@ class ImportJob(TestCase):
|
|||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
||||
|
||||
def test_read_reads(self):
|
||||
expected = [models.ReadThrough(
|
||||
finish_date=datetime.datetime(2019, 4, 9, 0, 0))]
|
||||
actual = models.ImportItem.objects.get(index=2)
|
||||
self.assertEqual(actual.reads[0].start_date, expected[0].start_date)
|
||||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
||||
self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0))
|
||||
self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0))
|
||||
|
||||
def test_unread_reads(self):
|
||||
expected = []
|
||||
|
|
Loading…
Reference in a new issue