diff --git a/fedireads/books_manager.py b/fedireads/books_manager.py index d105da75e..2b20fe0c2 100644 --- a/fedireads/books_manager.py +++ b/fedireads/books_manager.py @@ -4,7 +4,7 @@ from requests import HTTPError import importlib from urllib.parse import urlparse -from fedireads import models +from fedireads import models, settings from fedireads.tasks import app @@ -69,6 +69,10 @@ def get_by_absolute_id(absolute_id, model): except model.DoesNotExist: pass + url = urlparse(absolute_id) + if url.netloc != settings.DOMAIN: + return None + # try finding a local status with that id local_id = absolute_id.split('/')[-1] try: diff --git a/fedireads/connectors/abstract_connector.py b/fedireads/connectors/abstract_connector.py index 83729bb3f..1068fdeb3 100644 --- a/fedireads/connectors/abstract_connector.py +++ b/fedireads/connectors/abstract_connector.py @@ -4,6 +4,8 @@ from dateutil import parser import pytz import requests +from django.db import transaction + from fedireads import models @@ -16,18 +18,26 @@ class AbstractConnector(ABC): self.connector = info self.book_mappings = {} + self.key_mappings = { + 'isbn_13': ('isbn_13', None), + 'isbn_10': ('isbn_10', None), + 'oclc_numbers': ('oclc_number', None), + 'lccn': ('lccn', None), + } - self.base_url = info.base_url - self.books_url = info.books_url - self.covers_url = info.covers_url - self.search_url = info.search_url - self.key_name = info.key_name - self.max_query_count = info.max_query_count - self.name = info.name - self.local = info.local - self.id = info.id - self.identifier = info.identifier - + fields = [ + 'base_url', + 'books_url', + 'covers_url', + 'search_url', + 'key_name', + 'max_query_count', + 'name', + 'identifier', + 'local' + ] + for field in fields: + setattr(self, field, getattr(info, field)) def is_available(self): ''' check if you're allowed to use this connector ''' @@ -55,38 +65,94 @@ class AbstractConnector(ABC): return results - def create_book(self, key, data, model): - ''' create a work or edition from data ''' - # we really would rather use an existing book than make a new one - match = match_from_mappings(data, self.key_mappings) - if match: - if not isinstance(match, model): - if type(match).__name__ == 'Edition': - return match.parent_work - else: - return match.default_edition - return match + def get_or_create_book(self, remote_id): + ''' pull up a book record by whatever means possible ''' + # try to load the book + book = models.Book.objects.select_subclasses().filter( + remote_id=remote_id + ).first() + if book: + if isinstance(book, models.Work): + return book.default_edition + return book - kwargs = { - self.key_name: key, - 'title': data['title'], - 'connector': self.connector - } - book = model.objects.create(**kwargs) + # no book was found, so we start creating a new one + data = get_data(remote_id) + + work = None + edition = None + if self.is_work_data(data): + work_data = data + # if we requested a work and there's already an edition, we're set + work = self.match_from_mappings(work_data) + if work and work.default_edition: + return work.default_edition + + # no such luck, we need more information. + try: + edition_data = self.get_edition_from_work_data(work_data) + except KeyError: + # hack: re-use the work data as the edition data + # this is why remote ids aren't necessarily unique + edition_data = data + else: + edition_data = data + edition = self.match_from_mappings(edition_data) + # no need to figure out about the work if we already know about it + if edition and edition.parent_work: + return edition + + # no such luck, we need more information. + try: + work_data = self.get_work_from_edition_date(edition_data) + except KeyError: + # remember this hack: re-use the work data as the edition data + work_data = data + + # at this point, we need to figure out the work, edition, or both + # atomic so that we don't save a work with no edition for vice versa + with transaction.atomic(): + if not work: + work_key = work_data.get('url') + work = self.create_book(work_key, work_data, models.Work) + + if not edition: + ed_key = edition_data.get('url') + edition = self.create_book(ed_key, edition_data, models.Edition) + edition.default = True + edition.parent_work = work + edition.save() + + # now's our change to fill in author gaps + if not edition.authors and work.authors: + edition.authors.set(work.authors.all()) + edition.author_text = work.author_text + edition.save() + + return edition + + + def create_book(self, remote_id, data, model): + ''' create a work or edition from data ''' + book = model.objects.create( + remote_id=remote_id, + title=data['title'], + connector=self.connector, + ) return self.update_book_from_data(book, data) - def update_book_from_data(self, book, data): - ''' simple function to save data to a book ''' - update_from_mappings(book, data, self.book_mappings) + def update_book_from_data(self, book, data, update_cover=True): + ''' for creating a new book or syncing with data ''' + book = update_from_mappings(book, data, self.book_mappings) + + for author in self.get_authors_from_data(data): + book.authors.add(author) + book.author_text = ', '.join(a.name for a in book.authors.all()) book.save() - authors = self.get_authors_from_data(data) - for author in authors: - book.authors.add(author) - if authors: - book.author_text = ', '.join(a.name for a in authors) - book.save() + if not update_cover: + return book cover = self.get_cover_from_data(data) if cover: @@ -103,16 +169,61 @@ class AbstractConnector(ABC): key = getattr(book, self.key_name) data = self.load_book_data(key) - if book.sync_cover: - book.cover.save(*self.get_cover_from_data(data), save=True) if book.sync: - book = self.update_book_from_data(book, data) + book = self.update_book_from_data( + book, data, update_cover=book.sync_cover) + else: + cover = self.get_cover_from_data(data) + if cover: + book.cover.save(*cover, save=True) + return book - def load_book_data(self, remote_id): - ''' default method for loading book data ''' - return get_data(remote_id) + def match_from_mappings(self, data): + ''' try to find existing copies of this book using various keys ''' + keys = [ + ('openlibrary_key', models.Book), + ('librarything_key', models.Book), + ('goodreads_key', models.Book), + ('lccn', models.Work), + ('isbn_10', models.Edition), + ('isbn_13', models.Edition), + ('oclc_number', models.Edition), + ('asin', models.Edition), + ] + noop = lambda x: x + for key, model in keys: + formatter = None + if key in self.key_mappings: + key, formatter = self.key_mappings[key] + if not formatter: + formatter = noop + + value = data.get(key) + if not value: + continue + value = formatter(value) + + match = model.objects.select_subclasses().filter( + **{key: value}).first() + if match: + return match + + + @abstractmethod + def is_work_data(self, data): + ''' differentiate works and editions ''' + + + @abstractmethod + def get_edition_from_work_data(self, data): + ''' every work needs at least one edition ''' + + + @abstractmethod + def get_work_from_edition_date(self, data): + ''' every edition needs a work ''' @abstractmethod @@ -135,23 +246,11 @@ class AbstractConnector(ABC): ''' create a SearchResult obj from json ''' - @abstractmethod - def get_or_create_book(self, book_id): - ''' request and format a book given an identifier ''' - # return book model obj - - @abstractmethod def expand_book_data(self, book): ''' get more info on a book ''' - @abstractmethod - def get_or_create_author(self, book_id): - ''' request and format a book given an identifier ''' - # return book model obj - - def update_from_mappings(obj, data, mappings): ''' assign data to model with mappings ''' noop = lambda x: x @@ -172,37 +271,6 @@ def update_from_mappings(obj, data, mappings): return obj -def match_from_mappings(data, mappings): - ''' try to find existing copies of this book using various keys ''' - keys = [ - ('openlibrary_key', models.Book), - ('librarything_key', models.Book), - ('goodreads_key', models.Book), - ('lccn', models.Work), - ('isbn_10', models.Edition), - ('isbn_13', models.Edition), - ('oclc_number', models.Edition), - ('asin', models.Edition), - ] - noop = lambda x: x - for key, model in keys: - formatter = None - if key in mappings: - key, formatter = mappings[key] - if not formatter: - formatter = noop - - value = data.get(key) - if not value: - continue - value = formatter(value) - - match = model.objects.select_subclasses().filter( - **{key: value}).first() - if match: - return match - - def has_attr(obj, key): ''' helper function to check if a model object has a key ''' try: @@ -226,7 +294,7 @@ def get_data(url): resp = requests.get( url, headers={ - 'Accept': 'application/activity+json; charset=utf-8', + 'Accept': 'application/json; charset=utf-8', }, ) if not resp.ok: @@ -235,7 +303,7 @@ def get_data(url): return data -class SearchResult: +class SearchResult(object): ''' standardized search result object ''' def __init__(self, title, key, author, year): self.title = title diff --git a/fedireads/connectors/fedireads_connector.py b/fedireads/connectors/fedireads_connector.py index 83705e661..f1a5d68fd 100644 --- a/fedireads/connectors/fedireads_connector.py +++ b/fedireads/connectors/fedireads_connector.py @@ -1,10 +1,8 @@ ''' using another fedireads instance as a source of book data ''' -import re from uuid import uuid4 from django.core.exceptions import ObjectDoesNotExist from django.core.files.base import ContentFile -from django.db import transaction import requests from fedireads import models @@ -15,71 +13,29 @@ from .abstract_connector import update_from_mappings, get_date, get_data class Connector(AbstractConnector): ''' interact with other instances ''' def __init__(self, identifier): - self.key_mappings = { - 'isbn_13': ('isbn_13', None), - 'isbn_10': ('isbn_10', None), - 'oclc_numbers': ('oclc_number', None), - 'lccn': ('lccn', None), - } + super().__init__(identifier) self.book_mappings = self.key_mappings.copy() self.book_mappings.update({ 'published_date': ('published_date', get_date), 'first_published_date': ('first_published_date', get_date), }) - super().__init__(identifier) - def format_search_result(self, search_result): - return SearchResult(**search_result) + def is_work_data(self, data): + return data['book_type'] == 'Work' - def parse_search_data(self, data): - return data + def get_edition_from_work_data(self, data): + return data['editions'][0] - def get_or_create_book(self, remote_id): - ''' pull up a book record by whatever means possible ''' - # re-construct a remote id from the int and books_url - if re.match(r'^\d+$', remote_id): - remote_id = self.books_url + '/' + remote_id - book = models.Book.objects.select_subclasses().filter( - remote_id=remote_id - ).first() - if book: - if isinstance(book, models.Work): - return book.default_edition - return book + def get_work_from_edition_date(self, data): + return data['work'] - # no book was found, so we start creating a new one - data = get_data(remote_id) - if data['book_type'] == 'work': - work_data = data - try: - edition_data = data['editions'][0] - except KeyError: - # hack: re-use the work data as the edition data - edition_data = data - else: - edition_data = data - try: - work_data = data['work'] - except KeyError: - # hack: re-use the work data as the edition data - work_data = data - - with transaction.atomic(): - # create both work and a default edition - work_key = work_data.get('url') - work = self.create_book(work_key, work_data, models.Work) - - ed_key = edition_data.get('url') - edition = self.create_book(ed_key, edition_data, models.Edition) - edition.default = True - edition.parent_work = work - edition.save() - - return edition + def get_authors_from_data(self, data): + for author_url in data.get('authors', []): + yield self.get_or_create_author(author_url) def get_cover_from_data(self, data): @@ -96,14 +52,6 @@ class Connector(AbstractConnector): return [image_name, image_content] - def get_authors_from_data(self, data): - authors = [] - - for author_url in data.get('authors', []): - authors.append(self.get_or_create_author(author_url)) - return authors - - def get_or_create_author(self, remote_id): ''' load that author ''' try: @@ -125,16 +73,14 @@ class Connector(AbstractConnector): return author + def parse_search_data(self, data): + return data + + + def format_search_result(self, search_result): + return SearchResult(**search_result) + + def expand_book_data(self, book): # TODO pass - - -def get_cover(cover_url): - ''' ask openlibrary for the cover ''' - image_name = cover_url.split('/')[-1] - response = requests.get(cover_url) - if not response.ok: - response.raise_for_status() - image_content = ContentFile(response.content) - return [image_name, image_content] diff --git a/fedireads/connectors/openlibrary.py b/fedireads/connectors/openlibrary.py index ee73f47d9..40fe63f85 100644 --- a/fedireads/connectors/openlibrary.py +++ b/fedireads/connectors/openlibrary.py @@ -3,7 +3,6 @@ import re import requests from django.core.files.base import ContentFile -from django.db import transaction from fedireads import models from .abstract_connector import AbstractConnector, SearchResult @@ -15,6 +14,7 @@ from .openlibrary_languages import languages class Connector(AbstractConnector): ''' instantiate a connector for OL ''' def __init__(self, identifier): + super().__init__(identifier) get_first = lambda a: a[0] self.key_mappings = { 'isbn_13': ('isbn_13', get_first), @@ -32,12 +32,63 @@ class Connector(AbstractConnector): 'number_of_pages': ('pages', None), 'series': ('series', get_first), }) - super().__init__(identifier) + + + def is_work_data(self, data): + return not re.match(r'^OL\d+M$', data['key']) + + + def get_edition_from_work_data(self, data): + try: + key = data['key'] + except KeyError: + return False + url = '%s/%s/editions' % (self.books_url, key) + data = get_data(url) + return pick_default_edition(data['entries']) + + + def get_work_from_edition_date(self, data): + try: + key = data['works'][0]['key'] + except (IndexError, KeyError): + return False + url = '%s/%s' % (self.books_url, key) + return get_data(url) + + + def get_authors_from_data(self, data): + ''' parse author json and load or create authors ''' + for author_blob in data.get('authors', []): + author_blob = author_blob.get('author', author_blob) + # this id is "/authors/OL1234567A" and we want just "OL1234567A" + author_id = author_blob['key'].split('/')[-1] + yield self.get_or_create_author(author_id) + + + def get_cover_from_data(self, data): + ''' ask openlibrary for the cover ''' + if not data.get('covers'): + return None + + cover_id = data.get('covers')[0] + image_name = '%s-M.jpg' % cover_id + url = '%s/b/id/%s' % (self.covers_url, image_name) + response = requests.get(url) + if not response.ok: + response.raise_for_status() + image_content = ContentFile(response.content) + return [image_name, image_content] + + + def parse_search_data(self, data): + return data.get('docs') def format_search_result(self, doc): key = doc['key'] - key = key.split('/')[-1] + # build the absolute id from the openlibrary key + key = self.books_url + key author = doc.get('author_name') or ['Unknown'] return SearchResult( doc.get('title'), @@ -47,84 +98,6 @@ class Connector(AbstractConnector): ) - def parse_search_data(self, data): - return data.get('docs') - - - def get_or_create_book(self, olkey): - ''' pull up a book record by whatever means possible. - if you give a work key, it should give you the default edition, - annotated with work data. ''' - - book = models.Book.objects.select_subclasses().filter( - openlibrary_key=olkey - ).first() - if book: - if isinstance(book, models.Work): - return book.default_edition - return book - - # no book was found, so we start creating a new one - if re.match(r'^OL\d+W$', olkey): - with transaction.atomic(): - # create both work and a default edition - work_data = self.load_book_data(olkey) - work = self.create_book(olkey, work_data, models.Work) - - edition_options = self.load_edition_data(olkey).get('entries') - edition_data = pick_default_edition(edition_options) - if not edition_data: - # hack: re-use the work data as the edition data - edition_data = work_data - key = edition_data.get('key').split('/')[-1] - edition = self.create_book(key, edition_data, models.Edition) - edition.default = True - edition.parent_work = work - edition.save() - else: - with transaction.atomic(): - edition_data = self.load_book_data(olkey) - edition = self.create_book(olkey, edition_data, models.Edition) - - work_data = edition_data.get('works') - if not work_data: - # hack: we're re-using the edition data as the work data - work_key = olkey - else: - work_key = work_data[0]['key'].split('/')[-1] - - work = models.Work.objects.filter( - openlibrary_key=work_key - ).first() - if not work: - work_data = self.load_book_data(work_key) - work = self.create_book(work_key, work_data, models.Work) - edition.parent_work = work - edition.save() - if not edition.authors and work.authors: - edition.authors.set(work.authors.all()) - edition.author_text = ', '.join(a.name for a in edition.authors) - - return edition - - - def get_authors_from_data(self, data): - ''' parse author json and load or create authors ''' - authors = [] - for author_blob in data.get('authors', []): - # this id is "/authors/OL1234567A" and we want just "OL1234567A" - author_blob = author_blob.get('author', author_blob) - author_id = author_blob['key'].split('/')[-1] - authors.append(self.get_or_create_author(author_id)) - return authors - - - def load_book_data(self, olkey): - ''' query openlibrary for data on a book ''' - url = '%s/works/%s.json' % (self.books_url, olkey) - return get_data(url) - - def load_edition_data(self, olkey): ''' query openlibrary for editions of a work ''' url = '%s/works/%s/editions.json' % (self.books_url, olkey) @@ -167,8 +140,8 @@ class Connector(AbstractConnector): 'bio': ('bio', get_description), } author = update_from_mappings(author, data, mappings) - # TODO this is making some BOLD assumption name = data.get('name') + # TODO this is making some BOLD assumption if name: author.last_name = name.split(' ')[-1] author.first_name = ' '.join(name.split(' ')[:-1]) @@ -177,21 +150,6 @@ class Connector(AbstractConnector): return author - def get_cover_from_data(self, data): - ''' ask openlibrary for the cover ''' - if not data.get('covers'): - return None - - cover_id = data.get('covers')[0] - image_name = '%s-M.jpg' % cover_id - url = '%s/b/id/%s' % (self.covers_url, image_name) - response = requests.get(url) - if not response.ok: - response.raise_for_status() - image_content = ContentFile(response.content) - return [image_name, image_content] - - def get_description(description_blob): ''' descriptions can be a string or a dict ''' if isinstance(description_blob, dict): diff --git a/fedireads/connectors/self_connector.py b/fedireads/connectors/self_connector.py index c603af92e..cc68446f9 100644 --- a/fedireads/connectors/self_connector.py +++ b/fedireads/connectors/self_connector.py @@ -42,7 +42,7 @@ class Connector(AbstractConnector): def format_search_result(self, book): return SearchResult( book.title, - book.id, + book.absolute_id, book.author_text, book.published_date.year if book.published_date else None, ) @@ -59,17 +59,14 @@ class Connector(AbstractConnector): return None - def get_or_create_author(self, author_id): - ''' load that author ''' - try: - return models.Author.objects.get(id=author_id) - except ObjectDoesNotExist: - pass + def is_work_data(self, data): + pass + def get_edition_from_work_data(self, data): + pass - def parse_search_data(self, data): - ''' it's already in the right format, don't even worry about it ''' - return data + def get_work_from_edition_date(self, data): + pass def get_authors_from_data(self, data): return None @@ -77,8 +74,9 @@ class Connector(AbstractConnector): def get_cover_from_data(self, data): return None - def update_book(self, book_obj, data=None): - pass + def parse_search_data(self, data): + ''' it's already in the right format, don't even worry about it ''' + return data def expand_book_data(self, book): pass diff --git a/fedireads/templates/book_results.html b/fedireads/templates/book_results.html index 99e079e62..71d3a637c 100644 --- a/fedireads/templates/book_results.html +++ b/fedireads/templates/book_results.html @@ -13,7 +13,11 @@ {% for result in result_set.results %}
- {{ result.title }} by {{ result.author }} ({{ result.year }}) +
+ {% csrf_token %} + + +
{% endfor %} diff --git a/fedireads/urls.py b/fedireads/urls.py index d838fc2d5..5a43b2653 100644 --- a/fedireads/urls.py +++ b/fedireads/urls.py @@ -58,7 +58,7 @@ urlpatterns = [ re_path(r'%s/replies(.json)?/?$' % status_path, views.replies_page), # books - re_path(r'book/(?P[\w_:\d]+)(.json)?/?$', views.book_page), + re_path(r'%s(.json)?/?$' % book_path, views.book_page), re_path(r'%s/(?Pfriends|local|federated)?$' % book_path, views.book_page), re_path(r'%s/edit/?$' % book_path, views.edit_book_page), re_path(r'^editions/(?P\d+)/?$', views.editions_page), @@ -77,6 +77,7 @@ urlpatterns = [ re_path(r'^edit_profile/?$', actions.edit_profile), re_path(r'^import_data/?', actions.import_data), + re_path(r'^resolve_book/?', actions.resolve_book), re_path(r'^edit_book/(?P\d+)/?', actions.edit_book), re_path(r'^upload_cover/(?P\d+)/?', actions.upload_cover), diff --git a/fedireads/view_actions.py b/fedireads/view_actions.py index eaa493041..85cf40afd 100644 --- a/fedireads/view_actions.py +++ b/fedireads/view_actions.py @@ -115,6 +115,13 @@ def edit_profile(request): return redirect('/user/%s' % request.user.localname) +def resolve_book(request): + ''' figure out the local path to a book from a remote_id ''' + remote_id = request.POST.get('remote_id') + book = get_or_create_book(remote_id, key='remote_id') + return redirect('/book/%d' % book.id) + + @login_required def edit_book(request, book_id): ''' edit a book cool ''' diff --git a/fedireads/views.py b/fedireads/views.py index 2d2ca9ae2..828f2cffa 100644 --- a/fedireads/views.py +++ b/fedireads/views.py @@ -390,14 +390,6 @@ def edit_profile_page(request): def book_page(request, book_id, tab='friends'): ''' info about a book ''' - if ':' in book_id: - try: - connector_id, key, book_id = book_id.split(':') - except ValueError: - return HttpResponseNotFound() - book = get_or_create_book(book_id, key=key, connector_id=connector_id) - return redirect('/book/%d' % book.id) - book = get_or_create_book(book_id) if is_api_request(request): return JsonResponse(activitypub.get_book(book))