2020-03-07 20:22:28 +00:00
|
|
|
''' select and call a connector for whatever book task needs doing '''
|
2020-03-27 22:25:08 +00:00
|
|
|
import importlib
|
2020-05-04 19:36:55 +00:00
|
|
|
from urllib.parse import urlparse
|
2020-03-27 22:25:08 +00:00
|
|
|
|
2020-09-17 20:02:52 +00:00
|
|
|
from requests import HTTPError
|
|
|
|
|
2020-09-21 15:10:37 +00:00
|
|
|
from bookwyrm import models
|
|
|
|
from bookwyrm.tasks import app
|
2020-03-27 22:25:08 +00:00
|
|
|
|
2020-03-07 20:22:28 +00:00
|
|
|
|
2020-05-11 01:08:18 +00:00
|
|
|
def get_edition(book_id):
|
|
|
|
''' look up a book in the db and return an edition '''
|
|
|
|
book = models.Book.objects.select_subclasses().get(id=book_id)
|
|
|
|
if isinstance(book, models.Work):
|
|
|
|
book = book.default_edition
|
|
|
|
return book
|
|
|
|
|
|
|
|
|
2020-05-11 00:40:22 +00:00
|
|
|
def get_or_create_book(remote_id):
|
2020-03-07 20:22:28 +00:00
|
|
|
''' pull up a book record by whatever means possible '''
|
2020-05-13 01:56:28 +00:00
|
|
|
book = models.Book.objects.select_subclasses().filter(
|
|
|
|
remote_id=remote_id
|
|
|
|
).first()
|
2020-05-09 20:11:14 +00:00
|
|
|
if book:
|
2020-03-27 22:25:08 +00:00
|
|
|
return book
|
|
|
|
|
2020-05-11 00:40:22 +00:00
|
|
|
connector = get_or_create_connector(remote_id)
|
2020-05-08 23:56:49 +00:00
|
|
|
|
2020-10-01 03:09:25 +00:00
|
|
|
# raises ConnectorException
|
2020-05-11 00:40:22 +00:00
|
|
|
book = connector.get_or_create_book(remote_id)
|
2020-10-01 03:09:25 +00:00
|
|
|
if book:
|
|
|
|
load_more_data.delay(book.id)
|
2020-04-02 05:11:31 +00:00
|
|
|
return book
|
|
|
|
|
|
|
|
|
2020-05-04 19:36:55 +00:00
|
|
|
def get_or_create_connector(remote_id):
|
|
|
|
''' get the connector related to the author's server '''
|
|
|
|
url = urlparse(remote_id)
|
|
|
|
identifier = url.netloc
|
|
|
|
if not identifier:
|
2020-05-08 23:56:49 +00:00
|
|
|
raise ValueError('Invalid remote id')
|
2020-05-04 19:36:55 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
connector_info = models.Connector.objects.get(identifier=identifier)
|
|
|
|
except models.Connector.DoesNotExist:
|
|
|
|
connector_info = models.Connector.objects.create(
|
|
|
|
identifier=identifier,
|
2020-09-21 15:10:37 +00:00
|
|
|
connector_file='bookwyrm_connector',
|
2020-05-04 19:36:55 +00:00
|
|
|
base_url='https://%s' % identifier,
|
|
|
|
books_url='https://%s/book' % identifier,
|
|
|
|
covers_url='https://%s/images/covers' % identifier,
|
|
|
|
search_url='https://%s/search?q=' % identifier,
|
2020-11-02 16:50:21 +00:00
|
|
|
priority=2
|
2020-05-04 19:36:55 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
return load_connector(connector_info)
|
|
|
|
|
|
|
|
|
2020-04-02 05:11:31 +00:00
|
|
|
@app.task
|
|
|
|
def load_more_data(book_id):
|
|
|
|
''' background the work of getting all 10,000 editions of LoTR '''
|
|
|
|
book = models.Book.objects.select_subclasses().get(id=book_id)
|
2020-05-04 00:53:14 +00:00
|
|
|
connector = load_connector(book.connector)
|
2020-04-02 05:11:31 +00:00
|
|
|
connector.expand_book_data(book)
|
|
|
|
|
2020-03-27 22:25:08 +00:00
|
|
|
|
2020-10-29 22:29:23 +00:00
|
|
|
def search(query, min_confidence=0.1):
|
2020-05-03 19:59:06 +00:00
|
|
|
''' find books based on arbitary keywords '''
|
|
|
|
results = []
|
2020-05-03 20:32:23 +00:00
|
|
|
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
2020-05-03 22:26:47 +00:00
|
|
|
result_index = set()
|
2020-05-03 19:59:06 +00:00
|
|
|
for connector in get_connectors():
|
2020-05-09 19:09:40 +00:00
|
|
|
try:
|
2020-10-29 22:29:23 +00:00
|
|
|
result_set = connector.search(query, min_confidence=min_confidence)
|
2020-05-09 19:09:40 +00:00
|
|
|
except HTTPError:
|
|
|
|
continue
|
2020-05-03 20:32:23 +00:00
|
|
|
|
2020-05-03 22:26:47 +00:00
|
|
|
result_set = [r for r in result_set \
|
|
|
|
if dedup_slug(r) not in result_index]
|
|
|
|
# `|=` concats two sets. WE ARE GETTING FANCY HERE
|
|
|
|
result_index |= set(dedup_slug(r) for r in result_set)
|
2020-05-03 19:59:06 +00:00
|
|
|
results.append({
|
|
|
|
'connector': connector,
|
2020-05-03 22:26:47 +00:00
|
|
|
'results': result_set,
|
2020-05-03 19:59:06 +00:00
|
|
|
})
|
2020-04-29 17:57:20 +00:00
|
|
|
|
|
|
|
return results
|
2020-03-27 22:25:08 +00:00
|
|
|
|
2020-05-03 19:59:06 +00:00
|
|
|
|
2020-10-29 22:29:23 +00:00
|
|
|
def local_search(query, min_confidence=0.1):
|
2020-05-04 17:15:41 +00:00
|
|
|
''' only look at local search results '''
|
|
|
|
connector = load_connector(models.Connector.objects.get(local=True))
|
2020-10-29 22:29:23 +00:00
|
|
|
return connector.search(query, min_confidence=min_confidence)
|
2020-05-04 17:15:41 +00:00
|
|
|
|
|
|
|
|
2020-10-29 22:29:23 +00:00
|
|
|
def first_search_result(query, min_confidence=0.1):
|
2020-05-03 22:26:47 +00:00
|
|
|
''' search until you find a result that fits '''
|
|
|
|
for connector in get_connectors():
|
2020-10-29 22:29:23 +00:00
|
|
|
result = connector.search(query, min_confidence=min_confidence)
|
2020-05-03 22:26:47 +00:00
|
|
|
if result:
|
|
|
|
return result[0]
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2020-05-04 01:56:29 +00:00
|
|
|
def update_book(book, data=None):
|
2020-03-28 23:30:54 +00:00
|
|
|
''' re-sync with the original data source '''
|
2020-05-04 00:53:14 +00:00
|
|
|
connector = load_connector(book.connector)
|
2020-05-04 01:56:29 +00:00
|
|
|
connector.update_book(book, data=data)
|
2020-03-28 23:30:54 +00:00
|
|
|
|
2020-03-27 21:14:28 +00:00
|
|
|
|
2020-05-03 19:59:06 +00:00
|
|
|
def get_connectors():
|
|
|
|
''' load all connectors '''
|
2020-05-12 17:01:36 +00:00
|
|
|
for info in models.Connector.objects.order_by('priority').all():
|
|
|
|
yield load_connector(info)
|
2020-04-29 17:57:20 +00:00
|
|
|
|
|
|
|
|
2020-05-03 19:59:06 +00:00
|
|
|
def load_connector(connector_info):
|
|
|
|
''' instantiate the connector class '''
|
2020-03-27 23:36:52 +00:00
|
|
|
connector = importlib.import_module(
|
2020-09-21 15:10:37 +00:00
|
|
|
'bookwyrm.connectors.%s' % connector_info.connector_file
|
2020-03-27 23:36:52 +00:00
|
|
|
)
|
|
|
|
return connector.Connector(connector_info.identifier)
|