Split expand book data task into per-edition tasks

Loading every edition in one task takes ages, and produces a large task
that clogs up the queue. This will create more, smaller tasks that will
finish more quickly.
This commit is contained in:
Mouse Reeve 2022-05-31 12:41:57 -07:00
parent 355e7039f0
commit d149e57494
3 changed files with 22 additions and 9 deletions

View file

@ -152,6 +152,15 @@ def load_more_data(connector_id, book_id):
connector.expand_book_data(book) connector.expand_book_data(book)
@app.task(queue="low_priority")
def create_edition_task(connector_id, work_id, data):
"""separate task for each of the 10,000 editions of LoTR"""
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
work = models.Work.objects.select_subclasses().get(id=work_id)
connector.create_edition_from_data(work, data)
def load_connector(connector_info): def load_connector(connector_info):
"""instantiate the connector class""" """instantiate the connector class"""
connector = importlib.import_module( connector = importlib.import_module(

View file

@ -5,7 +5,7 @@ from bookwyrm import models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import AbstractConnector, Mapping
from .abstract_connector import get_data from .abstract_connector import get_data
from .connector_manager import ConnectorException from .connector_manager import ConnectorException, create_edition_task
class Connector(AbstractConnector): class Connector(AbstractConnector):
@ -156,12 +156,16 @@ class Connector(AbstractConnector):
for edition_uri in edition_options.get("uris"): for edition_uri in edition_options.get("uris"):
remote_id = self.get_remote_id(edition_uri) remote_id = self.get_remote_id(edition_uri)
try: create_edition_task.delay(self.connector.id, work.id, remote_id)
data = self.get_book_data(remote_id)
except ConnectorException: def create_edition_from_data(self, work, edition_data, instance=None):
# who, indeed, knows """pass in the url as data and then call the version in abstract connector"""
continue try:
self.create_edition_from_data(work, data) data = self.get_book_data(edition_data)
except ConnectorException:
# who, indeed, knows
return
super().create_edition_from_data(work, data, instance=instance)
def get_cover_url(self, cover_blob, *_): def get_cover_url(self, cover_blob, *_):
"""format the relative cover url into an absolute one: """format the relative cover url into an absolute one:

View file

@ -5,7 +5,7 @@ from bookwyrm import models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import AbstractConnector, Mapping
from .abstract_connector import get_data, infer_physical_format, unique_physical_format from .abstract_connector import get_data, infer_physical_format, unique_physical_format
from .connector_manager import ConnectorException from .connector_manager import ConnectorException, create_edition_task
from .openlibrary_languages import languages from .openlibrary_languages import languages
@ -204,7 +204,7 @@ class Connector(AbstractConnector):
# does this edition have ANY interesting data? # does this edition have ANY interesting data?
if ignore_edition(edition_data): if ignore_edition(edition_data):
continue continue
self.create_edition_from_data(work, edition_data) create_edition_task.delay(self.connector.id, work.id, edition_data)
def ignore_edition(edition_data): def ignore_edition(edition_data):