Merge branch 'fedireads_connector' into code-cleanup

2024-06-26 08:50:39 +00:00 · 2020-05-10 13:37:16 -07:00 · 2020-05-10 13:37:16 -07:00 · 3edfc0be74
parent 72b4e7da76 2ef87c2131
commit 3edfc0be74
9 changed files with 244 additions and 269 deletions
--- a/fedireads/books_manager.py
+++ b/fedireads/books_manager.py
@ -4,7 +4,7 @@ from requests import HTTPError
 import importlib
 from urllib.parse import urlparse

-from fedireads import models
+from fedireads import models, settings
 from fedireads.tasks import app


@ -69,6 +69,10 @@ def get_by_absolute_id(absolute_id, model):
    except model.DoesNotExist:
        pass

+    url = urlparse(absolute_id)
+    if url.netloc != settings.DOMAIN:
+        return None
+
    # try finding a local status with that id
    local_id = absolute_id.split('/')[-1]
    try:
--- a/fedireads/connectors/abstract_connector.py
+++ b/fedireads/connectors/abstract_connector.py
@ -4,6 +4,8 @@ from dateutil import parser
 import pytz
 import requests

+from django.db import transaction
+
 from fedireads import models


@ -16,6 +18,12 @@ class AbstractConnector(ABC):
        self.connector = info

        self.book_mappings = {}
+        self.key_mappings = {
+            'isbn_13': ('isbn_13', None),
+            'isbn_10': ('isbn_10', None),
+            'oclc_numbers': ('oclc_number', None),
+            'lccn': ('lccn', None),
+        }

        fields = [
            'base_url',
@ -58,38 +66,94 @@ class AbstractConnector(ABC):
        return results


-    def create_book(self, key, data, model):
-        ''' create a work or edition from data '''
-        # we really would rather use an existing book than make a new one
-        match = match_from_mappings(data, self.key_mappings)
-        if match:
-            if not isinstance(match, model):
-                if type(match).__name__ == 'Edition':
-                    return match.parent_work
-                else:
-                    return match.default_edition
-            return match
+    def get_or_create_book(self, remote_id):
+        ''' pull up a book record by whatever means possible '''
+        # try to load the book
+        book = models.Book.objects.select_subclasses().filter(
+            remote_id=remote_id
+        ).first()
+        if book:
+            if isinstance(book, models.Work):
+                return book.default_edition
+            return book

-        kwargs = {
-            self.key_name: key,
-            'title': data['title'],
-            'connector': self.connector
-        }
-        book = model.objects.create(**kwargs)
+        # no book was found, so we start creating a new one
+        data = get_data(remote_id)
+
+        work = None
+        edition = None
+        if self.is_work_data(data):
+            work_data = data
+            # if we requested a work and there's already an edition, we're set
+            work = self.match_from_mappings(work_data)
+            if work and work.default_edition:
+                return work.default_edition
+
+            # no such luck, we need more information.
+            try:
+                edition_data = self.get_edition_from_work_data(work_data)
+            except KeyError:
+                # hack: re-use the work data as the edition data
+                # this is why remote ids aren't necessarily unique
+                edition_data = data
+        else:
+            edition_data = data
+            edition = self.match_from_mappings(edition_data)
+            # no need to figure out about the work if we already know about it
+            if edition and edition.parent_work:
+                return edition
+
+            # no such luck, we need more information.
+            try:
+                work_data = self.get_work_from_edition_date(edition_data)
+            except KeyError:
+                # remember this hack: re-use the work data as the edition data
+                work_data = data
+
+        # at this point, we need to figure out the work, edition, or both
+        # atomic so that we don't save a work with no edition for vice versa
+        with transaction.atomic():
+            if not work:
+                work_key = work_data.get('url')
+                work = self.create_book(work_key, work_data, models.Work)
+
+            if not edition:
+                ed_key = edition_data.get('url')
+                edition = self.create_book(ed_key, edition_data, models.Edition)
+                edition.default = True
+                edition.parent_work = work
+                edition.save()
+
+        # now's our change to fill in author gaps
+        if not edition.authors and work.authors:
+            edition.authors.set(work.authors.all())
+            edition.author_text = work.author_text
+            edition.save()
+
+        return edition
+
+
+    def create_book(self, remote_id, data, model):
+        ''' create a work or edition from data '''
+        book = model.objects.create(
+            remote_id=remote_id,
+            title=data['title'],
+            connector=self.connector,
+        )
        return self.update_book_from_data(book, data)


-    def update_book_from_data(self, book, data):
-        ''' simple function to save data to a book '''
-        update_from_mappings(book, data, self.book_mappings)
+    def update_book_from_data(self, book, data, update_cover=True):
+        ''' for creating a new book or syncing with data '''
+        book = update_from_mappings(book, data, self.book_mappings)
+
+        for author in self.get_authors_from_data(data):
+            book.authors.add(author)
+        book.author_text = ', '.join(a.name for a in book.authors.all())
        book.save()

-        authors = self.get_authors_from_data(data)
-        for author in authors:
-            book.authors.add(author)
-        if authors:
-            book.author_text = ', '.join(a.name for a in authors)
-            book.save()
+        if not update_cover:
+            return book

        cover = self.get_cover_from_data(data)
        if cover:
@ -106,16 +170,61 @@ class AbstractConnector(ABC):
            key = getattr(book, self.key_name)
            data = self.load_book_data(key)

-        if book.sync_cover:
-            book.cover.save(*self.get_cover_from_data(data), save=True)
        if book.sync:
-            book = self.update_book_from_data(book, data)
+            book = self.update_book_from_data(
+                book, data, update_cover=book.sync_cover)
+        else:
+            cover = self.get_cover_from_data(data)
+            if cover:
+                book.cover.save(*cover, save=True)
+
        return book


-    def load_book_data(self, remote_id):
-        ''' default method for loading book data '''
-        return get_data(remote_id)
+    def match_from_mappings(self, data):
+        ''' try to find existing copies of this book using various keys '''
+        keys = [
+            ('openlibrary_key', models.Book),
+            ('librarything_key', models.Book),
+            ('goodreads_key', models.Book),
+            ('lccn', models.Work),
+            ('isbn_10', models.Edition),
+            ('isbn_13', models.Edition),
+            ('oclc_number', models.Edition),
+            ('asin', models.Edition),
+        ]
+        noop = lambda x: x
+        for key, model in keys:
+            formatter = None
+            if key in self.key_mappings:
+                key, formatter = self.key_mappings[key]
+            if not formatter:
+                formatter = noop
+
+            value = data.get(key)
+            if not value:
+                continue
+            value = formatter(value)
+
+            match = model.objects.select_subclasses().filter(
+                **{key: value}).first()
+            if match:
+                return match
+
+
+    @abstractmethod
+    def is_work_data(self, data):
+        ''' differentiate works and editions '''
+
+
+    @abstractmethod
+    def get_edition_from_work_data(self, data):
+        ''' every work needs at least one edition '''
+
+
+    @abstractmethod
+    def get_work_from_edition_date(self, data):
+        ''' every edition needs a work '''


    @abstractmethod
@ -138,23 +247,11 @@ class AbstractConnector(ABC):
        ''' create a SearchResult obj from json '''


-    @abstractmethod
-    def get_or_create_book(self, book_id):
-        ''' request and format a book given an identifier '''
-        # return book model obj
-
-
    @abstractmethod
    def expand_book_data(self, book):
        ''' get more info on a book '''


-    @abstractmethod
-    def get_or_create_author(self, book_id):
-        ''' request and format a book given an identifier '''
-        # return book model obj
-
-
 def update_from_mappings(obj, data, mappings):
    ''' assign data to model with mappings '''
    noop = lambda x: x
@ -175,37 +272,6 @@ def update_from_mappings(obj, data, mappings):
    return obj


-def match_from_mappings(data, mappings):
-    ''' try to find existing copies of this book using various keys '''
-    keys = [
-        ('openlibrary_key', models.Book),
-        ('librarything_key', models.Book),
-        ('goodreads_key', models.Book),
-        ('lccn', models.Work),
-        ('isbn_10', models.Edition),
-        ('isbn_13', models.Edition),
-        ('oclc_number', models.Edition),
-        ('asin', models.Edition),
-    ]
-    noop = lambda x: x
-    for key, model in keys:
-        formatter = None
-        if key in mappings:
-            key, formatter = mappings[key]
-        if not formatter:
-            formatter = noop
-
-        value = data.get(key)
-        if not value:
-            continue
-        value = formatter(value)
-
-        match = model.objects.select_subclasses().filter(
-            **{key: value}).first()
-        if match:
-            return match
-
-
 def has_attr(obj, key):
    ''' helper function to check if a model object has a key '''
    try:
@ -229,7 +295,7 @@ def get_data(url):
    resp = requests.get(
        url,
        headers={
-            'Accept': 'application/activity+json; charset=utf-8',
+            'Accept': 'application/json; charset=utf-8',
        },
    )
    if not resp.ok:
--- a/fedireads/connectors/fedireads_connector.py
+++ b/fedireads/connectors/fedireads_connector.py
@ -1,10 +1,8 @@
 ''' using another fedireads instance as a source of book data '''
-import re
 from uuid import uuid4

 from django.core.exceptions import ObjectDoesNotExist
 from django.core.files.base import ContentFile
-from django.db import transaction
 import requests

 from fedireads import models
@ -15,71 +13,29 @@ from .abstract_connector import update_from_mappings, get_date, get_data
 class Connector(AbstractConnector):
    ''' interact with other instances '''
    def __init__(self, identifier):
-        self.key_mappings = {
-            'isbn_13': ('isbn_13', None),
-            'isbn_10': ('isbn_10', None),
-            'oclc_numbers': ('oclc_number', None),
-            'lccn': ('lccn', None),
-        }
+        super().__init__(identifier)
        self.book_mappings = self.key_mappings.copy()
        self.book_mappings.update({
            'published_date': ('published_date', get_date),
            'first_published_date': ('first_published_date', get_date),
        })
-        super().__init__(identifier)


-    def format_search_result(self, search_result):
-        return SearchResult(**search_result)
+    def is_work_data(self, data):
+        return data['book_type'] == 'Work'


-    def parse_search_data(self, data):
-        return data
+    def get_edition_from_work_data(self, data):
+        return data['editions'][0]


-    def get_or_create_book(self, remote_id):
-        ''' pull up a book record by whatever means possible '''
-        # re-construct a remote id from the int and books_url
-        if re.match(r'^\d+$', remote_id):
-            remote_id = self.books_url + '/' + remote_id
-        book = models.Book.objects.select_subclasses().filter(
-            remote_id=remote_id
-        ).first()
-        if book:
-            if isinstance(book, models.Work):
-                return book.default_edition
-            return book
+    def get_work_from_edition_date(self, data):
+        return data['work']

-        # no book was found, so we start creating a new one
-        data = get_data(remote_id)

-        if data['book_type'] == 'work':
-            work_data = data
-            try:
-                edition_data = data['editions'][0]
-            except KeyError:
-                # hack: re-use the work data as the edition data
-                edition_data = data
-        else:
-            edition_data = data
-            try:
-                work_data = data['work']
-            except KeyError:
-                # hack: re-use the work data as the edition data
-                work_data = data
-
-        with transaction.atomic():
-            # create both work and a default edition
-            work_key = work_data.get('url')
-            work = self.create_book(work_key, work_data, models.Work)
-
-            ed_key = edition_data.get('url')
-            edition = self.create_book(ed_key, edition_data, models.Edition)
-            edition.default = True
-            edition.parent_work = work
-            edition.save()
-
-        return edition
+    def get_authors_from_data(self, data):
+        for author_url in data.get('authors', []):
+            yield self.get_or_create_author(author_url)


    def get_cover_from_data(self, data):
@ -96,14 +52,6 @@ class Connector(AbstractConnector):
        return [image_name, image_content]


-    def get_authors_from_data(self, data):
-        authors = []
-
-        for author_url in data.get('authors', []):
-            authors.append(self.get_or_create_author(author_url))
-        return authors
-
-
    def get_or_create_author(self, remote_id):
        ''' load that author '''
        try:
@ -125,16 +73,14 @@ class Connector(AbstractConnector):
        return author


+    def parse_search_data(self, data):
+        return data
+
+
+    def format_search_result(self, search_result):
+        return SearchResult(**search_result)
+
+
    def expand_book_data(self, book):
        # TODO
        pass
-
-
-def get_cover(cover_url):
-    ''' download the cover '''
-    image_name = cover_url.split('/')[-1]
-    response = requests.get(cover_url)
-    if not response.ok:
-        response.raise_for_status()
-    image_content = ContentFile(response.content)
-    return [image_name, image_content]
--- a/fedireads/connectors/openlibrary.py
+++ b/fedireads/connectors/openlibrary.py
@ -3,7 +3,6 @@ import re
 import requests

 from django.core.files.base import ContentFile
-from django.db import transaction

 from fedireads import models
 from .abstract_connector import AbstractConnector, SearchResult
@ -15,6 +14,7 @@ from .openlibrary_languages import languages
 class Connector(AbstractConnector):
    ''' instantiate a connector for OL '''
    def __init__(self, identifier):
+        super().__init__(identifier)
        get_first = lambda a: a[0]
        self.key_mappings = {
            'isbn_13': ('isbn_13', get_first),
@ -32,12 +32,62 @@ class Connector(AbstractConnector):
            'number_of_pages': ('pages', None),
            'series': ('series', get_first),
        })
-        super().__init__(identifier)
+
+
+    def is_work_data(self, data):
+        return not re.match(r'^OL\d+M$', data['key'])
+
+
+    def get_edition_from_work_data(self, data):
+        try:
+            key = data['key']
+        except KeyError:
+            return False
+        url = '%s/%s/editions' % (self.books_url, key)
+        data = get_data(url)
+        return pick_default_edition(data['entries'])
+
+
+    def get_work_from_edition_date(self, data):
+        try:
+            key = data['works'][0]['key']
+        except (IndexError, KeyError):
+            return False
+        url = '%s/%s' % (self.books_url, key)
+        return get_data(url)
+
+
+    def get_authors_from_data(self, data):
+        ''' parse author json and load or create authors '''
+        for author_blob in data.get('authors', []):
+            author_blob = author_blob.get('author', author_blob)
+            # this id is "/authors/OL1234567A" and we want just "OL1234567A"
+            author_id = author_blob['key'].split('/')[-1]
+            yield self.get_or_create_author(author_id)
+
+
+    def get_cover_from_data(self, data):
+        ''' ask openlibrary for the cover '''
+        if not data.get('covers'):
+            return None
+
+        cover_id = data.get('covers')[0]
+        image_name = '%s-M.jpg' % cover_id
+        url = '%s/b/id/%s' % (self.covers_url, image_name)
+        response = requests.get(url)
+        if not response.ok:
+            response.raise_for_status()
+        image_content = ContentFile(response.content)
+        return [image_name, image_content]
+
+
+    def parse_search_data(self, data):
+        return data.get('docs')


    def format_search_result(self, doc):
-        key = doc['key']
-        key = key.split('/')[-1]
+        # build the absolute id from the openlibrary key
+        key = self.books_url + doc['key']
        author = doc.get('author_name') or ['Unknown']
        return SearchResult(
            doc.get('title'),
@ -47,84 +97,6 @@ class Connector(AbstractConnector):
        )


-    def parse_search_data(self, data):
-        return data.get('docs')
-
-
-    def get_or_create_book(self, olkey):
-        ''' pull up a book record by whatever means possible.
-        if you give a work key, it should give you the default edition,
-        annotated with work data. '''
-
-        book = models.Book.objects.select_subclasses().filter(
-            openlibrary_key=olkey
-        ).first()
-        if book:
-            if isinstance(book, models.Work):
-                return book.default_edition
-            return book
-
-        # no book was found, so we start creating a new one
-        if re.match(r'^OL\d+W$', olkey):
-            with transaction.atomic():
-                # create both work and a default edition
-                work_data = self.load_book_data(olkey)
-                work = self.create_book(olkey, work_data, models.Work)
-
-                edition_options = self.load_edition_data(olkey).get('entries')
-                edition_data = pick_default_edition(edition_options)
-                if not edition_data:
-                    # hack: re-use the work data as the edition data
-                    edition_data = work_data
-                key = edition_data.get('key').split('/')[-1]
-                edition = self.create_book(key, edition_data, models.Edition)
-                edition.default = True
-                edition.parent_work = work
-                edition.save()
-        else:
-            with transaction.atomic():
-                edition_data = self.load_book_data(olkey)
-                edition = self.create_book(olkey, edition_data, models.Edition)
-
-                work_data = edition_data.get('works')
-                if not work_data:
-                    # hack: we're re-using the edition data as the work data
-                    work_key = olkey
-                else:
-                    work_key = work_data[0]['key'].split('/')[-1]
-
-                work = models.Work.objects.filter(
-                    openlibrary_key=work_key
-                ).first()
-                if not work:
-                    work_data = self.load_book_data(work_key)
-                    work = self.create_book(work_key, work_data, models.Work)
-                edition.parent_work = work
-                edition.save()
-        if not edition.authors and work.authors:
-            edition.authors.set(work.authors.all())
-            edition.author_text = ', '.join(a.name for a in edition.authors)
-
-        return edition
-
-
-    def get_authors_from_data(self, data):
-        ''' parse author json and load or create authors '''
-        authors = []
-        for author_blob in data.get('authors', []):
-            # this id is "/authors/OL1234567A" and we want just "OL1234567A"
-            author_blob = author_blob.get('author', author_blob)
-            author_id = author_blob['key'].split('/')[-1]
-            authors.append(self.get_or_create_author(author_id))
-        return authors
-
-
-    def load_book_data(self, olkey):
-        ''' query openlibrary for data on a book '''
-        url = '%s/works/%s.json' % (self.books_url, olkey)
-        return get_data(url)
-
-
    def load_edition_data(self, olkey):
        ''' query openlibrary for editions of a work '''
        url = '%s/works/%s/editions.json' % (self.books_url, olkey)
@ -167,8 +139,8 @@ class Connector(AbstractConnector):
            'bio': ('bio', get_description),
        }
        author = update_from_mappings(author, data, mappings)
-        # TODO this is making some BOLD assumption
        name = data.get('name')
+        # TODO this is making some BOLD assumption
        if name:
            author.last_name = name.split(' ')[-1]
            author.first_name = ' '.join(name.split(' ')[:-1])
@ -177,21 +149,6 @@ class Connector(AbstractConnector):
        return author


-    def get_cover_from_data(self, data):
-        ''' ask openlibrary for the cover '''
-        if not data.get('covers'):
-            return None
-
-        cover_id = data.get('covers')[0]
-        image_name = '%s-M.jpg' % cover_id
-        url = '%s/b/id/%s' % (self.covers_url, image_name)
-        response = requests.get(url)
-        if not response.ok:
-            response.raise_for_status()
-        image_content = ContentFile(response.content)
-        return [image_name, image_content]
-
-
 def get_description(description_blob):
    ''' descriptions can be a string or a dict '''
    if isinstance(description_blob, dict):
--- a/fedireads/connectors/self_connector.py
+++ b/fedireads/connectors/self_connector.py
@ -42,7 +42,7 @@ class Connector(AbstractConnector):
    def format_search_result(self, book):
        return SearchResult(
            book.title,
-            book.id,
+            book.absolute_id,
            book.author_text,
            book.published_date.year if book.published_date else None,
        )
@ -59,17 +59,14 @@ class Connector(AbstractConnector):
            return None


-    def get_or_create_author(self, author_id):
-        ''' load that author '''
-        try:
-            return models.Author.objects.get(id=author_id)
-        except ObjectDoesNotExist:
+    def is_work_data(self, data):
        pass

+    def get_edition_from_work_data(self, data):
+        pass

-    def parse_search_data(self, data):
-        ''' it's already in the right format, don't even worry about it '''
-        return data
+    def get_work_from_edition_date(self, data):
+        pass

    def get_authors_from_data(self, data):
        return None
@ -77,8 +74,9 @@ class Connector(AbstractConnector):
    def get_cover_from_data(self, data):
        return None

-    def update_book(self, book_obj, data=None):
-        pass
+    def parse_search_data(self, data):
+        ''' it's already in the right format, don't even worry about it '''
+        return data

    def expand_book_data(self, book):
        pass
--- a/fedireads/templates/book_results.html
+++ b/fedireads/templates/book_results.html
@ -13,7 +13,11 @@

        {% for result in result_set.results %}
        <div>
-            <a href="/book/{% if not result_set.connector.local %}{{ result_set.connector.id }}:{{ result_set.connector.key_name}}:{% endif %}{{ result.key }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }})
+            <form action="/resolve_book" method="POST">
+                {% csrf_token %}
+                <input type="hidden" name="remote_id" value="{{ result.key }}">
+                <button type="submit">{{ result.title }} by {{ result.author }} ({{ result.year }})</button>
+            </form>
        </div>
        {% endfor %}
    </section>
--- a/fedireads/urls.py
+++ b/fedireads/urls.py
@ -58,7 +58,7 @@ urlpatterns = [
    re_path(r'%s/replies(.json)?/?$' % status_path, views.replies_page),

    # books
-    re_path(r'book/(?P<book_id>[\w_:\d]+)(.json)?/?$', views.book_page),
+    re_path(r'%s(.json)?/?$' % book_path, views.book_page),
    re_path(r'%s/(?P<tab>friends|local|federated)?$' % book_path, views.book_page),
    re_path(r'%s/edit/?$' % book_path, views.edit_book_page),
    re_path(r'^editions/(?P<work_id>\d+)/?$', views.editions_page),
@ -77,6 +77,7 @@ urlpatterns = [
    re_path(r'^edit_profile/?$', actions.edit_profile),

    re_path(r'^import_data/?', actions.import_data),
+    re_path(r'^resolve_book/?', actions.resolve_book),
    re_path(r'^edit_book/(?P<book_id>\d+)/?', actions.edit_book),
    re_path(r'^upload_cover/(?P<book_id>\d+)/?', actions.upload_cover),

--- a/fedireads/view_actions.py
+++ b/fedireads/view_actions.py
@ -114,6 +114,13 @@ def edit_profile(request):
    return redirect('/user/%s' % request.user.localname)


+def resolve_book(request):
+    ''' figure out the local path to a book from a remote_id '''
+    remote_id = request.POST.get('remote_id')
+    book = get_or_create_book(remote_id, key='remote_id')
+    return redirect('/book/%d' % book.id)
+
+
@login_required
 def edit_book(request, book_id):
    ''' edit a book cool '''
--- a/fedireads/views.py
+++ b/fedireads/views.py
@ -390,14 +390,6 @@ def edit_profile_page(request):

 def book_page(request, book_id, tab='friends'):
    ''' info about a book '''
-    if ':' in book_id:
-        try:
-            connector_id, key, book_id = book_id.split(':')
-        except ValueError:
-            return HttpResponseNotFound()
-        book = get_or_create_book(book_id, key=key, connector_id=connector_id)
-        return redirect('/book/%d' % book.id)
-
    book = get_or_create_book(book_id)
    if is_api_request(request):
        return JsonResponse(activitypub.get_book(book))