mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-11 17:55:37 +00:00
Stop assuming every book is Hamlet
This commit is contained in:
parent
a46d7f5dc7
commit
7ce0890a41
7 changed files with 37 additions and 32 deletions
|
@ -64,14 +64,14 @@ def load_more_data(book_id):
|
||||||
connector.expand_book_data(book)
|
connector.expand_book_data(book)
|
||||||
|
|
||||||
|
|
||||||
def search(query):
|
def search(query, min_confidence=0.1):
|
||||||
''' find books based on arbitary keywords '''
|
''' find books based on arbitary keywords '''
|
||||||
results = []
|
results = []
|
||||||
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
||||||
result_index = set()
|
result_index = set()
|
||||||
for connector in get_connectors():
|
for connector in get_connectors():
|
||||||
try:
|
try:
|
||||||
result_set = connector.search(query)
|
result_set = connector.search(query, min_confidence=min_confidence)
|
||||||
except HTTPError:
|
except HTTPError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -87,16 +87,16 @@ def search(query):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def local_search(query):
|
def local_search(query, min_confidence=0.1):
|
||||||
''' only look at local search results '''
|
''' only look at local search results '''
|
||||||
connector = load_connector(models.Connector.objects.get(local=True))
|
connector = load_connector(models.Connector.objects.get(local=True))
|
||||||
return connector.search(query)
|
return connector.search(query, min_confidence=min_confidence)
|
||||||
|
|
||||||
|
|
||||||
def first_search_result(query):
|
def first_search_result(query, min_confidence=0.1):
|
||||||
''' search until you find a result that fits '''
|
''' search until you find a result that fits '''
|
||||||
for connector in get_connectors():
|
for connector in get_connectors():
|
||||||
result = connector.search(query)
|
result = connector.search(query, min_confidence=min_confidence)
|
||||||
if result:
|
if result:
|
||||||
return result[0]
|
return result[0]
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
''' functionality outline for a book data connector '''
|
''' functionality outline for a book data connector '''
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
import pytz
|
import pytz
|
||||||
from urllib3.exceptions import ProtocolError
|
|
||||||
import requests
|
import requests
|
||||||
from requests import HTTPError
|
from requests import HTTPError
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ class AbstractConnector(ABC):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def search(self, query):
|
def search(self, query, min_confidence=None):
|
||||||
''' free text search '''
|
''' free text search '''
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
'%s%s' % (self.search_url, query),
|
'%s%s' % (self.search_url, query),
|
||||||
|
@ -160,7 +160,7 @@ class AbstractConnector(ABC):
|
||||||
author_text = []
|
author_text = []
|
||||||
for author in self.get_authors_from_data(data):
|
for author in self.get_authors_from_data(data):
|
||||||
book.authors.add(author)
|
book.authors.add(author)
|
||||||
author_text += author.display_name
|
author_text.append(author.display_name)
|
||||||
book.author_text = ', '.join(author_text)
|
book.author_text = ', '.join(author_text)
|
||||||
book.save()
|
book.save()
|
||||||
|
|
||||||
|
@ -298,7 +298,7 @@ def get_data(url):
|
||||||
'Accept': 'application/json; charset=utf-8',
|
'Accept': 'application/json; charset=utf-8',
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
except ProtocolError:
|
except ConnectionError:
|
||||||
raise ConnectorException()
|
raise ConnectorException()
|
||||||
if not resp.ok:
|
if not resp.ok:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
@ -306,13 +306,14 @@ def get_data(url):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
class SearchResult:
|
class SearchResult:
|
||||||
''' standardized search result object '''
|
''' standardized search result object '''
|
||||||
def __init__(self, title, key, author, year):
|
title: str
|
||||||
self.title = title
|
key: str
|
||||||
self.key = key
|
author: str
|
||||||
self.author = author
|
year: str
|
||||||
self.year = year
|
confidence: int = 1
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
||||||
|
|
|
@ -129,10 +129,10 @@ class Connector(AbstractConnector):
|
||||||
key = self.books_url + search_result['key']
|
key = self.books_url + search_result['key']
|
||||||
author = search_result.get('author_name') or ['Unknown']
|
author = search_result.get('author_name') or ['Unknown']
|
||||||
return SearchResult(
|
return SearchResult(
|
||||||
search_result.get('title'),
|
title=search_result.get('title'),
|
||||||
key,
|
key=key,
|
||||||
', '.join(author),
|
author=', '.join(author),
|
||||||
search_result.get('first_publish_year'),
|
year=search_result.get('first_publish_year'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult
|
||||||
|
|
||||||
class Connector(AbstractConnector):
|
class Connector(AbstractConnector):
|
||||||
''' instantiate a connector '''
|
''' instantiate a connector '''
|
||||||
def search(self, query):
|
def search(self, query, min_confidence=0.1):
|
||||||
''' right now you can't search bookwyrm sorry, but when
|
''' right now you can't search bookwyrm sorry, but when
|
||||||
that gets implemented it will totally rule '''
|
that gets implemented it will totally rule '''
|
||||||
vector = SearchVector('title', weight='A') +\
|
vector = SearchVector('title', weight='A') +\
|
||||||
|
@ -28,7 +28,7 @@ class Connector(AbstractConnector):
|
||||||
).annotate(
|
).annotate(
|
||||||
rank=SearchRank(vector, query)
|
rank=SearchRank(vector, query)
|
||||||
).filter(
|
).filter(
|
||||||
rank__gt=0
|
rank__gt=min_confidence
|
||||||
).order_by('-rank')
|
).order_by('-rank')
|
||||||
results = results.filter(default=True) or results
|
results = results.filter(default=True) or results
|
||||||
|
|
||||||
|
@ -42,11 +42,12 @@ class Connector(AbstractConnector):
|
||||||
|
|
||||||
def format_search_result(self, search_result):
|
def format_search_result(self, search_result):
|
||||||
return SearchResult(
|
return SearchResult(
|
||||||
search_result.title,
|
title=search_result.title,
|
||||||
search_result.local_id,
|
key=search_result.local_id,
|
||||||
search_result.author_text,
|
author=search_result.author_text,
|
||||||
search_result.published_date.year if \
|
year=search_result.published_date.year if \
|
||||||
search_result.published_date else None,
|
search_result.published_date else None,
|
||||||
|
confidence=search_result.rank,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -63,7 +63,9 @@ class ImportItem(models.Model):
|
||||||
|
|
||||||
def get_book_from_isbn(self):
|
def get_book_from_isbn(self):
|
||||||
''' search by isbn '''
|
''' search by isbn '''
|
||||||
search_result = books_manager.first_search_result(self.isbn)
|
search_result = books_manager.first_search_result(
|
||||||
|
self.isbn, min_confidence=0.5
|
||||||
|
)
|
||||||
if search_result:
|
if search_result:
|
||||||
try:
|
try:
|
||||||
# don't crash the import when the connector fails
|
# don't crash the import when the connector fails
|
||||||
|
@ -79,7 +81,9 @@ class ImportItem(models.Model):
|
||||||
self.data['Title'],
|
self.data['Title'],
|
||||||
self.data['Author']
|
self.data['Author']
|
||||||
)
|
)
|
||||||
search_result = books_manager.first_search_result(search_term)
|
search_result = books_manager.first_search_result(
|
||||||
|
search_term, min_confidence=0.5
|
||||||
|
)
|
||||||
if search_result:
|
if search_result:
|
||||||
try:
|
try:
|
||||||
return books_manager.get_or_create_book(search_result.key)
|
return books_manager.get_or_create_book(search_result.key)
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
{% for result in result_set.results %}
|
{% for result in result_set.results %}
|
||||||
<div>
|
<div>
|
||||||
|
{{ result.confidence }}
|
||||||
<form action="/resolve_book" method="POST">
|
<form action="/resolve_book" method="POST">
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<input type="hidden" name="remote_id" value="{{ result.key }}">
|
<input type="hidden" name="remote_id" value="{{ result.key }}">
|
||||||
|
|
|
@ -24,7 +24,7 @@ class ImportJob(TestCase):
|
||||||
'Number of Pages': 416,
|
'Number of Pages': 416,
|
||||||
'Year Published': 2019,
|
'Year Published': 2019,
|
||||||
'Original Publication Year': 2019,
|
'Original Publication Year': 2019,
|
||||||
'Date Read': '2019/04/09',
|
'Date Read': '2019/04/12',
|
||||||
'Date Added': '2019/04/09',
|
'Date Added': '2019/04/09',
|
||||||
'Bookshelves': '',
|
'Bookshelves': '',
|
||||||
'Bookshelves with positions': '',
|
'Bookshelves with positions': '',
|
||||||
|
@ -97,11 +97,9 @@ class ImportJob(TestCase):
|
||||||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
||||||
|
|
||||||
def test_read_reads(self):
|
def test_read_reads(self):
|
||||||
expected = [models.ReadThrough(
|
|
||||||
finish_date=datetime.datetime(2019, 4, 9, 0, 0))]
|
|
||||||
actual = models.ImportItem.objects.get(index=2)
|
actual = models.ImportItem.objects.get(index=2)
|
||||||
self.assertEqual(actual.reads[0].start_date, expected[0].start_date)
|
self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0))
|
||||||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0))
|
||||||
|
|
||||||
def test_unread_reads(self):
|
def test_unread_reads(self):
|
||||||
expected = []
|
expected = []
|
||||||
|
|
Loading…
Reference in a new issue