moviewyrm/fedireads/books_manager.py

158 lines
4.6 KiB
Python
Raw Normal View History

2020-03-07 20:22:28 +00:00
''' select and call a connector for whatever book task needs doing '''
from requests import HTTPError
import importlib
from urllib.parse import urlparse
2020-05-10 19:56:59 +00:00
from fedireads import models, settings
2020-04-02 05:11:31 +00:00
from fedireads.tasks import app
2020-03-07 20:22:28 +00:00
2020-05-04 00:53:14 +00:00
def get_or_create_book(value, key='id', connector_id=None):
2020-03-07 20:22:28 +00:00
''' pull up a book record by whatever means possible '''
book = models.Book.objects.select_subclasses().filter(
**{key: value}
).first()
if book:
if not isinstance(book, models.Edition):
return book.default_edition
return book
if key == 'remote_id':
book = get_by_absolute_id(value, models.Book)
if book:
return book
if connector_id:
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
else:
connector = get_or_create_connector(value)
2020-05-04 00:53:14 +00:00
book = connector.get_or_create_book(value)
2020-04-02 05:11:31 +00:00
load_more_data.delay(book.id)
return book
def get_or_create_connector(remote_id):
''' get the connector related to the author's server '''
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
raise ValueError('Invalid remote id')
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file='fedireads_connector',
base_url='https://%s' % identifier,
books_url='https://%s/book' % identifier,
covers_url='https://%s/images/covers' % identifier,
search_url='https://%s/search?q=' % identifier,
key_name='remote_id',
priority=3
)
return load_connector(connector_info)
def get_by_absolute_id(absolute_id, model):
''' generalized function to get from a model with a remote_id field '''
if not absolute_id:
return None
# check if it's a remote status
try:
return model.objects.get(remote_id=absolute_id)
except model.DoesNotExist:
pass
2020-05-10 19:56:59 +00:00
url = urlparse(absolute_id)
if url.netloc != settings.DOMAIN:
return None
# try finding a local status with that id
local_id = absolute_id.split('/')[-1]
try:
if hasattr(model.objects, 'select_subclasses'):
possible_match = model.objects.select_subclasses().get(id=local_id)
else:
possible_match = model.objects.get(id=local_id)
except model.DoesNotExist:
return None
# make sure it's not actually a remote status with an id that
# clashes with a local id
if possible_match.absolute_id == absolute_id:
return possible_match
return None
2020-04-02 05:11:31 +00:00
@app.task
def load_more_data(book_id):
''' background the work of getting all 10,000 editions of LoTR '''
book = models.Book.objects.select_subclasses().get(id=book_id)
2020-05-04 00:53:14 +00:00
connector = load_connector(book.connector)
2020-04-02 05:11:31 +00:00
connector.expand_book_data(book)
2020-05-03 22:26:47 +00:00
def search(query):
''' find books based on arbitary keywords '''
results = []
2020-05-03 20:32:23 +00:00
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
2020-05-03 22:26:47 +00:00
result_index = set()
for connector in get_connectors():
try:
result_set = connector.search(query)
except HTTPError:
continue
2020-05-03 20:32:23 +00:00
2020-05-03 22:26:47 +00:00
result_set = [r for r in result_set \
if dedup_slug(r) not in result_index]
# `|=` concats two sets. WE ARE GETTING FANCY HERE
result_index |= set(dedup_slug(r) for r in result_set)
results.append({
'connector': connector,
2020-05-03 22:26:47 +00:00
'results': result_set,
})
2020-04-29 17:57:20 +00:00
return results
2020-05-04 17:15:41 +00:00
def local_search(query):
''' only look at local search results '''
connector = load_connector(models.Connector.objects.get(local=True))
return connector.search(query)
2020-05-03 22:26:47 +00:00
def first_search_result(query):
''' search until you find a result that fits '''
for connector in get_connectors():
result = connector.search(query)
if result:
return result[0]
return None
2020-05-04 01:56:29 +00:00
def update_book(book, data=None):
2020-03-28 23:30:54 +00:00
''' re-sync with the original data source '''
2020-05-04 00:53:14 +00:00
connector = load_connector(book.connector)
2020-05-04 01:56:29 +00:00
connector.update_book(book, data=data)
2020-03-28 23:30:54 +00:00
def get_connectors():
''' load all connectors '''
connectors_info = models.Connector.objects.order_by('priority').all()
return [load_connector(c) for c in connectors_info]
2020-04-29 17:57:20 +00:00
def load_connector(connector_info):
''' instantiate the connector class '''
connector = importlib.import_module(
'fedireads.connectors.%s' % connector_info.connector_file
)
return connector.Connector(connector_info.identifier)