diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index cfbe0524..5662d1d5 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,7 +1,7 @@ # These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] -patreon: bookwrym +patreon: bookwyrm open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel diff --git a/bookwyrm/activitypub/__init__.py b/bookwyrm/activitypub/__init__.py index b5b124ec..a4fef41e 100644 --- a/bookwyrm/activitypub/__init__.py +++ b/bookwyrm/activitypub/__init__.py @@ -11,6 +11,7 @@ from .note import Tombstone from .interaction import Boost, Like from .ordered_collection import OrderedCollection, OrderedCollectionPage from .person import Person, PublicKey +from .response import ActivitypubResponse from .book import Edition, Work, Author from .verbs import Create, Delete, Undo, Update from .verbs import Follow, Accept, Reject diff --git a/bookwyrm/activitypub/response.py b/bookwyrm/activitypub/response.py new file mode 100644 index 00000000..bbc44c4d --- /dev/null +++ b/bookwyrm/activitypub/response.py @@ -0,0 +1,18 @@ +from django.http import JsonResponse + +from .base_activity import ActivityEncoder + +class ActivitypubResponse(JsonResponse): + """ + A class to be used in any place that's serializing responses for + Activitypub enabled clients. Uses JsonResponse under the hood, but already + configures some stuff beforehand. Made to be a drop-in replacement of + JsonResponse. + """ + def __init__(self, data, encoder=ActivityEncoder, safe=True, + json_dumps_params=None, **kwargs): + + if 'content_type' not in kwargs: + kwargs['content_type'] = 'application/activity+json' + + super().__init__(data, encoder, safe, json_dumps_params, **kwargs) diff --git a/bookwyrm/broadcast.py b/bookwyrm/broadcast.py index a98b6774..f4186c4d 100644 --- a/bookwyrm/broadcast.py +++ b/bookwyrm/broadcast.py @@ -3,7 +3,7 @@ import json from django.utils.http import http_date import requests -from bookwyrm import models +from bookwyrm import models, settings from bookwyrm.activitypub import ActivityEncoder from bookwyrm.tasks import app from bookwyrm.signatures import make_signature, make_digest @@ -79,6 +79,7 @@ def sign_and_send(sender, data, destination): 'Digest': digest, 'Signature': make_signature(sender, destination, now, digest), 'Content-Type': 'application/activity+json; charset=utf-8', + 'User-Agent': settings.USER_AGENT, }, ) if not response.ok: diff --git a/bookwyrm/connectors/__init__.py b/bookwyrm/connectors/__init__.py index 4eb91de4..cfafd286 100644 --- a/bookwyrm/connectors/__init__.py +++ b/bookwyrm/connectors/__init__.py @@ -2,3 +2,5 @@ from .settings import CONNECTORS from .abstract_connector import ConnectorException from .abstract_connector import get_data, get_image + +from .connector_manager import search, local_search, first_search_result diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 86ac7435..d63bd135 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,20 +1,18 @@ ''' functionality outline for a book data connector ''' from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import asdict, dataclass +import logging from urllib3.exceptions import RequestError from django.db import transaction import requests -from requests import HTTPError from requests.exceptions import SSLError -from bookwyrm import activitypub, models - - -class ConnectorException(HTTPError): - ''' when the connector can't do what was asked ''' +from bookwyrm import activitypub, models, settings +from .connector_manager import load_more_data, ConnectorException +logger = logging.getLogger(__name__) class AbstractMinimalConnector(ABC): ''' just the bare bones, for other bookwyrm instances ''' def __init__(self, identifier): @@ -42,11 +40,16 @@ class AbstractMinimalConnector(ABC): '%s%s' % (self.search_url, query), headers={ 'Accept': 'application/json; charset=utf-8', + 'User-Agent': settings.USER_AGENT, }, ) if not resp.ok: resp.raise_for_status() - data = resp.json() + try: + data = resp.json() + except ValueError as e: + logger.exception(e) + raise ConnectorException('Unable to parse json response', e) results = [] for doc in self.parse_search_data(data)[:10]: @@ -83,7 +86,6 @@ class AbstractConnector(AbstractMinimalConnector): return True - @transaction.atomic def get_or_create_book(self, remote_id): ''' translate arbitrary json into an Activitypub dataclass ''' # first, check if we have the origin_id saved @@ -116,13 +118,17 @@ class AbstractConnector(AbstractMinimalConnector): if not work_data or not edition_data: raise ConnectorException('Unable to load book data: %s' % remote_id) - # create activitypub object - work_activity = activitypub.Work(**work_data) - # this will dedupe automatically - work = work_activity.to_model(models.Work) - for author in self.get_authors_from_data(data): - work.authors.add(author) - return self.create_edition_from_data(work, edition_data) + with transaction.atomic(): + # create activitypub object + work_activity = activitypub.Work(**work_data) + # this will dedupe automatically + work = work_activity.to_model(models.Work) + for author in self.get_authors_from_data(data): + work.authors.add(author) + + edition = self.create_edition_from_data(work, edition_data) + load_more_data.delay(self.connector.id, work.id) + return edition def create_edition_from_data(self, work, edition_data): @@ -168,7 +174,7 @@ class AbstractConnector(AbstractMinimalConnector): ''' every work needs at least one edition ''' @abstractmethod - def get_work_from_edition_date(self, data): + def get_work_from_edition_data(self, data): ''' every edition needs a work ''' @abstractmethod @@ -196,9 +202,10 @@ def get_data(url): url, headers={ 'Accept': 'application/json; charset=utf-8', + 'User-Agent': settings.USER_AGENT, }, ) - except RequestError: + except (RequestError, SSLError): raise ConnectorException() if not resp.ok: resp.raise_for_status() @@ -213,7 +220,12 @@ def get_data(url): def get_image(url): ''' wrapper for requesting an image ''' try: - resp = requests.get(url) + resp = requests.get( + url, + headers={ + 'User-Agent': settings.USER_AGENT, + }, + ) except (RequestError, SSLError): return None if not resp.ok: @@ -228,12 +240,19 @@ class SearchResult: key: str author: str year: str + connector: object confidence: int = 1 def __repr__(self): return "".format( self.key, self.title, self.author) + def json(self): + ''' serialize a connector for json response ''' + serialized = asdict(self) + del serialized['connector'] + return serialized + class Mapping: ''' associate a local database field with a field in an external dataset ''' diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index e4d32fd3..3c6f4614 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -13,4 +13,5 @@ class Connector(AbstractMinimalConnector): return data def format_search_result(self, search_result): + search_result['connector'] = self return SearchResult(**search_result) diff --git a/bookwyrm/books_manager.py b/bookwyrm/connectors/connector_manager.py similarity index 87% rename from bookwyrm/books_manager.py rename to bookwyrm/connectors/connector_manager.py index 3b865768..d3b01f7a 100644 --- a/bookwyrm/books_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -1,4 +1,4 @@ -''' select and call a connector for whatever book task needs doing ''' +''' interface with whatever connectors the app has ''' import importlib from urllib.parse import urlparse @@ -8,43 +8,8 @@ from bookwyrm import models from bookwyrm.tasks import app -def get_edition(book_id): - ''' look up a book in the db and return an edition ''' - book = models.Book.objects.select_subclasses().get(id=book_id) - if isinstance(book, models.Work): - book = book.default_edition - return book - - -def get_or_create_connector(remote_id): - ''' get the connector related to the author's server ''' - url = urlparse(remote_id) - identifier = url.netloc - if not identifier: - raise ValueError('Invalid remote id') - - try: - connector_info = models.Connector.objects.get(identifier=identifier) - except models.Connector.DoesNotExist: - connector_info = models.Connector.objects.create( - identifier=identifier, - connector_file='bookwyrm_connector', - base_url='https://%s' % identifier, - books_url='https://%s/book' % identifier, - covers_url='https://%s/images/covers' % identifier, - search_url='https://%s/search?q=' % identifier, - priority=2 - ) - - return load_connector(connector_info) - - -@app.task -def load_more_data(book_id): - ''' background the work of getting all 10,000 editions of LoTR ''' - book = models.Book.objects.select_subclasses().get(id=book_id) - connector = load_connector(book.connector) - connector.expand_book_data(book) +class ConnectorException(HTTPError): + ''' when the connector can't do what was asked ''' def search(query, min_confidence=0.1): @@ -55,7 +20,7 @@ def search(query, min_confidence=0.1): for connector in get_connectors(): try: result_set = connector.search(query, min_confidence=min_confidence) - except HTTPError: + except (HTTPError, ConnectorException): continue result_set = [r for r in result_set \ @@ -91,6 +56,38 @@ def get_connectors(): yield load_connector(info) +def get_or_create_connector(remote_id): + ''' get the connector related to the author's server ''' + url = urlparse(remote_id) + identifier = url.netloc + if not identifier: + raise ValueError('Invalid remote id') + + try: + connector_info = models.Connector.objects.get(identifier=identifier) + except models.Connector.DoesNotExist: + connector_info = models.Connector.objects.create( + identifier=identifier, + connector_file='bookwyrm_connector', + base_url='https://%s' % identifier, + books_url='https://%s/book' % identifier, + covers_url='https://%s/images/covers' % identifier, + search_url='https://%s/search?q=' % identifier, + priority=2 + ) + + return load_connector(connector_info) + + +@app.task +def load_more_data(connector_id, book_id): + ''' background the work of getting all 10,000 editions of LoTR ''' + connector_info = models.Connector.objects.get(id=connector_id) + connector = load_connector(connector_info) + book = models.Book.objects.select_subclasses().get(id=book_id) + connector.expand_book_data(book) + + def load_connector(connector_info): ''' instantiate the connector class ''' connector = importlib.import_module( diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 3b60c307..55355131 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -3,7 +3,8 @@ import re from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult, Mapping -from .abstract_connector import ConnectorException, get_data +from .abstract_connector import get_data +from .connector_manager import ConnectorException from .openlibrary_languages import languages @@ -68,7 +69,7 @@ class Connector(AbstractConnector): key = data['key'] except KeyError: raise ConnectorException('Invalid book data') - return '%s/%s' % (self.books_url, key) + return '%s%s' % (self.books_url, key) def is_work_data(self, data): @@ -80,17 +81,17 @@ class Connector(AbstractConnector): key = data['key'] except KeyError: raise ConnectorException('Invalid book data') - url = '%s/%s/editions' % (self.books_url, key) + url = '%s%s/editions' % (self.books_url, key) data = get_data(url) return pick_default_edition(data['entries']) - def get_work_from_edition_date(self, data): + def get_work_from_edition_data(self, data): try: key = data['works'][0]['key'] except (IndexError, KeyError): raise ConnectorException('No work found for edition') - url = '%s/%s' % (self.books_url, key) + url = '%s%s' % (self.books_url, key) return get_data(url) @@ -100,14 +101,14 @@ class Connector(AbstractConnector): author_blob = author_blob.get('author', author_blob) # this id is "/authors/OL1234567A" author_id = author_blob['key'] - url = '%s/%s.json' % (self.base_url, author_id) + url = '%s%s' % (self.base_url, author_id) yield self.get_or_create_author(url) def get_cover_url(self, cover_blob): ''' ask openlibrary for the cover ''' cover_id = cover_blob[0] - image_name = '%s-M.jpg' % cover_id + image_name = '%s-L.jpg' % cover_id return '%s/b/id/%s' % (self.covers_url, image_name) @@ -123,13 +124,14 @@ class Connector(AbstractConnector): title=search_result.get('title'), key=key, author=', '.join(author), + connector=self, year=search_result.get('first_publish_year'), ) def load_edition_data(self, olkey): ''' query openlibrary for editions of a work ''' - url = '%s/works/%s/editions.json' % (self.books_url, olkey) + url = '%s/works/%s/editions' % (self.books_url, olkey) return get_data(url) @@ -149,7 +151,7 @@ def get_description(description_blob): ''' descriptions can be a string or a dict ''' if isinstance(description_blob, dict): return description_blob.get('value') - return description_blob + return description_blob def get_openlibrary_key(key): diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 8d31c8a1..0c21e7bc 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -1,6 +1,9 @@ ''' using a bookwyrm instance as a source of book data ''' +from functools import reduce +import operator + from django.contrib.postgres.search import SearchRank, SearchVector -from django.db.models import F +from django.db.models import Count, F, Q from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult @@ -9,38 +12,18 @@ from .abstract_connector import AbstractConnector, SearchResult class Connector(AbstractConnector): ''' instantiate a connector ''' def search(self, query, min_confidence=0.1): - ''' right now you can't search bookwyrm sorry, but when - that gets implemented it will totally rule ''' - vector = SearchVector('title', weight='A') +\ - SearchVector('subtitle', weight='B') +\ - SearchVector('authors__name', weight='C') +\ - SearchVector('isbn_13', weight='A') +\ - SearchVector('isbn_10', weight='A') +\ - SearchVector('openlibrary_key', weight='C') +\ - SearchVector('goodreads_key', weight='C') +\ - SearchVector('asin', weight='C') +\ - SearchVector('oclc_number', weight='C') +\ - SearchVector('remote_id', weight='C') +\ - SearchVector('description', weight='D') +\ - SearchVector('series', weight='D') - - results = models.Edition.objects.annotate( - search=vector - ).annotate( - rank=SearchRank(vector, query) - ).filter( - rank__gt=min_confidence - ).order_by('-rank') - - # remove non-default editions, if possible - results = results.filter(parent_work__default_edition__id=F('id')) \ - or results - + ''' search your local database ''' + # first, try searching unqiue identifiers + results = search_identifiers(query) + if not results: + # then try searching title/author + results = search_title_author(query, min_confidence) search_results = [] - for book in results[:10]: - search_results.append( - self.format_search_result(book) - ) + for result in results: + search_results.append(self.format_search_result(result)) + if len(search_results) >= 10: + break + search_results.sort(key=lambda r: r.confidence, reverse=True) return search_results @@ -51,31 +34,74 @@ class Connector(AbstractConnector): author=search_result.author_text, year=search_result.published_date.year if \ search_result.published_date else None, - confidence=search_result.rank, + connector=self, + confidence=search_result.rank if \ + hasattr(search_result, 'rank') else 1, ) - def get_remote_id_from_data(self, data): - pass - def is_work_data(self, data): pass def get_edition_from_work_data(self, data): pass - def get_work_from_edition_date(self, data): + def get_work_from_edition_data(self, data): pass def get_authors_from_data(self, data): return None - def get_cover_from_data(self, data): - return None - def parse_search_data(self, data): ''' it's already in the right format, don't even worry about it ''' return data def expand_book_data(self, book): pass + + +def search_identifiers(query): + ''' tries remote_id, isbn; defined as dedupe fields on the model ''' + filters = [{f.name: query} for f in models.Edition._meta.get_fields() \ + if hasattr(f, 'deduplication_field') and f.deduplication_field] + results = models.Edition.objects.filter( + reduce(operator.or_, (Q(**f) for f in filters)) + ).distinct() + + # when there are multiple editions of the same work, pick the default. + # it would be odd for this to happen. + return results.filter(parent_work__default_edition__id=F('id')) \ + or results + + +def search_title_author(query, min_confidence): + ''' searches for title and author ''' + vector = SearchVector('title', weight='A') +\ + SearchVector('subtitle', weight='B') +\ + SearchVector('authors__name', weight='C') +\ + SearchVector('series', weight='D') + + results = models.Edition.objects.annotate( + search=vector + ).annotate( + rank=SearchRank(vector, query) + ).filter( + rank__gt=min_confidence + ).order_by('-rank') + + # when there are multiple editions of the same work, pick the closest + editions_of_work = results.values( + 'parent_work' + ).annotate( + Count('parent_work') + ).values_list('parent_work') + + for work_id in set(editions_of_work): + editions = results.filter(parent_work=work_id) + default = editions.filter(parent_work__default_edition=F('id')) + default_rank = default.first().rank if default.exists() else 0 + # if mutliple books have the top rank, pick the default edition + if default_rank == editions.first().rank: + yield default.first() + else: + yield editions.first() diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 93fc1c48..9b8a4f01 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -8,8 +8,6 @@ from bookwyrm.models import ImportJob, ImportItem from bookwyrm.status import create_notification logger = logging.getLogger(__name__) -# TODO: remove or increase once we're confident it's not causing problems. -MAX_ENTRIES = 500 def create_job(user, csv_file, include_reviews, privacy): @@ -19,12 +17,13 @@ def create_job(user, csv_file, include_reviews, privacy): include_reviews=include_reviews, privacy=privacy ) - for index, entry in enumerate(list(csv.DictReader(csv_file))[:MAX_ENTRIES]): + for index, entry in enumerate(list(csv.DictReader(csv_file))): if not all(x in entry for x in ('ISBN13', 'Title', 'Author')): raise ValueError('Author, title, and isbn must be in data.') ImportItem(job=job, index=index, data=entry).save() return job + def create_retry_job(user, original_job, items): ''' retry items that didn't import ''' job = ImportJob.objects.create( @@ -37,6 +36,7 @@ def create_retry_job(user, original_job, items): ImportItem(job=job, index=item.index, data=item.data).save() return job + def start_import(job): ''' initalizes a csv import job ''' result = import_data.delay(job.id) @@ -49,7 +49,6 @@ def import_data(job_id): ''' does the actual lookup work in a celery task ''' job = ImportJob.objects.get(id=job_id) try: - results = [] for item in job.items.all(): try: item.resolve() @@ -61,7 +60,6 @@ def import_data(job_id): if item.book: item.save() - results.append(item) # shelves book and handles reviews outgoing.handle_imported_book( diff --git a/bookwyrm/incoming.py b/bookwyrm/incoming.py index ddf99f97..5e42fe45 100644 --- a/bookwyrm/incoming.py +++ b/bookwyrm/incoming.py @@ -185,11 +185,15 @@ def handle_create(activity): ''' someone did something, good on them ''' # deduplicate incoming activities activity = activity['object'] - status_id = activity['id'] + status_id = activity.get('id') if models.Status.objects.filter(remote_id=status_id).count(): return - serializer = activitypub.activity_objects[activity['type']] + try: + serializer = activitypub.activity_objects[activity['type']] + except KeyError: + return + activity = serializer(**activity) try: model = models.activity_models[activity.type] diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py new file mode 100644 index 00000000..044b2a98 --- /dev/null +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -0,0 +1,83 @@ +''' PROCEED WITH CAUTION: uses deduplication fields to permanently +merge book data objects ''' +from django.core.management.base import BaseCommand +from django.db.models import Count +from bookwyrm import models + + +def update_related(canonical, obj): + ''' update all the models with fk to the object being removed ''' + # move related models to canonical + related_models = [ + (r.remote_field.name, r.related_model) for r in \ + canonical._meta.related_objects] + for (related_field, related_model) in related_models: + related_objs = related_model.objects.filter( + **{related_field: obj}) + for related_obj in related_objs: + print( + 'replacing in', + related_model.__name__, + related_field, + related_obj.id + ) + try: + setattr(related_obj, related_field, canonical) + related_obj.save() + except TypeError: + getattr(related_obj, related_field).add(canonical) + getattr(related_obj, related_field).remove(obj) + + +def copy_data(canonical, obj): + ''' try to get the most data possible ''' + for data_field in obj._meta.get_fields(): + if not hasattr(data_field, 'activitypub_field'): + continue + data_value = getattr(obj, data_field.name) + if not data_value: + continue + if not getattr(canonical, data_field.name): + print('setting data field', data_field.name, data_value) + setattr(canonical, data_field.name, data_value) + canonical.save() + + +def dedupe_model(model): + ''' combine duplicate editions and update related models ''' + fields = model._meta.get_fields() + dedupe_fields = [f for f in fields if \ + hasattr(f, 'deduplication_field') and f.deduplication_field] + for field in dedupe_fields: + dupes = model.objects.values(field.name).annotate( + Count(field.name) + ).filter(**{'%s__count__gt' % field.name: 1}) + + for dupe in dupes: + value = dupe[field.name] + if not value or value == '': + continue + print('----------') + print(dupe) + objs = model.objects.filter( + **{field.name: value} + ).order_by('id') + canonical = objs.first() + print('keeping', canonical.remote_id) + for obj in objs[1:]: + print(obj.remote_id) + copy_data(canonical, obj) + update_related(canonical, obj) + # remove the outdated entry + obj.delete() + + +class Command(BaseCommand): + ''' dedplucate allllll the book data models ''' + help = 'merges duplicate book data' + # pylint: disable=no-self-use,unused-argument + def handle(self, *args, **options): + ''' run deudplications ''' + dedupe_model(models.Edition) + dedupe_model(models.Work) + dedupe_model(models.Author) diff --git a/bookwyrm/models/__init__.py b/bookwyrm/models/__init__.py index 0c3bf33e..48852cfe 100644 --- a/bookwyrm/models/__init__.py +++ b/bookwyrm/models/__init__.py @@ -9,8 +9,11 @@ from .connector import Connector from .shelf import Shelf, ShelfBook from .status import Status, GeneratedNote, Review, Comment, Quotation -from .status import Favorite, Boost, Notification, ReadThrough +from .status import Boost from .attachment import Image +from .favorite import Favorite +from .notification import Notification +from .readthrough import ReadThrough from .tag import Tag, UserTag @@ -25,3 +28,6 @@ from .site import SiteSettings, SiteInvite, PasswordReset cls_members = inspect.getmembers(sys.modules[__name__], inspect.isclass) activity_models = {c[1].activity_serializer.__name__: c[1] \ for c in cls_members if hasattr(c[1], 'activity_serializer')} + +status_models = [ + c.__name__ for (_, c) in activity_models.items() if issubclass(c, Status)] diff --git a/bookwyrm/models/base_model.py b/bookwyrm/models/base_model.py index 0de61fd1..b212d693 100644 --- a/bookwyrm/models/base_model.py +++ b/bookwyrm/models/base_model.py @@ -35,6 +35,11 @@ class BookWyrmModel(models.Model): ''' this is just here to provide default fields for other models ''' abstract = True + @property + def local_path(self): + ''' how to link to this object in the local app ''' + return self.get_remote_id().replace('https://%s' % DOMAIN, '') + @receiver(models.signals.post_save) #pylint: disable=unused-argument @@ -104,7 +109,7 @@ class ActivitypubMixin: not field.deduplication_field: continue - value = data.get(field.activitypub_field) + value = data.get(field.get_activitypub_field()) if not value: continue filters.append({field.name: value}) @@ -237,7 +242,9 @@ class OrderedCollectionPageMixin(ActivitypubMixin): ).serialize() -def to_ordered_collection_page(queryset, remote_id, id_only=False, page=1): +# pylint: disable=unused-argument +def to_ordered_collection_page( + queryset, remote_id, id_only=False, page=1, **kwargs): ''' serialize and pagiante a queryset ''' paginated = Paginator(queryset, PAGE_LENGTH) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 1e1d8d20..08189510 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -126,6 +126,14 @@ class Work(OrderedCollectionPageMixin, Book): ''' in case the default edition is not set ''' return self.default_edition or self.editions.first() + def to_edition_list(self, **kwargs): + ''' an ordered collection of editions ''' + return self.to_ordered_collection( + self.editions.order_by('-updated_date').all(), + remote_id='%s/editions' % self.remote_id, + **kwargs + ) + activity_serializer = activitypub.Work serialize_reverse_fields = [('editions', 'editions')] deserialize_reverse_fields = [('editions', 'editions')] diff --git a/bookwyrm/models/favorite.py b/bookwyrm/models/favorite.py new file mode 100644 index 00000000..8373b016 --- /dev/null +++ b/bookwyrm/models/favorite.py @@ -0,0 +1,26 @@ +''' like/fav/star a status ''' +from django.db import models +from django.utils import timezone + +from bookwyrm import activitypub +from .base_model import ActivitypubMixin, BookWyrmModel +from . import fields + +class Favorite(ActivitypubMixin, BookWyrmModel): + ''' fav'ing a post ''' + user = fields.ForeignKey( + 'User', on_delete=models.PROTECT, activitypub_field='actor') + status = fields.ForeignKey( + 'Status', on_delete=models.PROTECT, activitypub_field='object') + + activity_serializer = activitypub.Like + + def save(self, *args, **kwargs): + ''' update user active time ''' + self.user.last_active_date = timezone.now() + self.user.save() + super().save(*args, **kwargs) + + class Meta: + ''' can't fav things twice ''' + unique_together = ('user', 'status') diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 835094cd..1ebe9b31 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -6,7 +6,7 @@ from django.contrib.postgres.fields import JSONField from django.db import models from django.utils import timezone -from bookwyrm import books_manager +from bookwyrm.connectors import connector_manager from bookwyrm.models import ReadThrough, User, Book from .fields import PrivacyLevels @@ -71,12 +71,12 @@ class ImportItem(models.Model): def get_book_from_isbn(self): ''' search by isbn ''' - search_result = books_manager.first_search_result( + search_result = connector_manager.first_search_result( self.isbn, min_confidence=0.999 ) if search_result: # raises ConnectorException - return books_manager.get_or_create_book(search_result.key) + return search_result.connector.get_or_create_book(search_result.key) return None @@ -86,12 +86,12 @@ class ImportItem(models.Model): self.data['Title'], self.data['Author'] ) - search_result = books_manager.first_search_result( + search_result = connector_manager.first_search_result( search_term, min_confidence=0.999 ) if search_result: # raises ConnectorException - return books_manager.get_or_create_book(search_result.key) + return search_result.connector.get_or_create_book(search_result.key) return None diff --git a/bookwyrm/models/notification.py b/bookwyrm/models/notification.py new file mode 100644 index 00000000..4ce5dcea --- /dev/null +++ b/bookwyrm/models/notification.py @@ -0,0 +1,33 @@ +''' alert a user to activity ''' +from django.db import models +from .base_model import BookWyrmModel + + +NotificationType = models.TextChoices( + 'NotificationType', + 'FAVORITE REPLY MENTION TAG FOLLOW FOLLOW_REQUEST BOOST IMPORT') + +class Notification(BookWyrmModel): + ''' you've been tagged, liked, followed, etc ''' + user = models.ForeignKey('User', on_delete=models.PROTECT) + related_book = models.ForeignKey( + 'Edition', on_delete=models.PROTECT, null=True) + related_user = models.ForeignKey( + 'User', + on_delete=models.PROTECT, null=True, related_name='related_user') + related_status = models.ForeignKey( + 'Status', on_delete=models.PROTECT, null=True) + related_import = models.ForeignKey( + 'ImportJob', on_delete=models.PROTECT, null=True) + read = models.BooleanField(default=False) + notification_type = models.CharField( + max_length=255, choices=NotificationType.choices) + + class Meta: + ''' checks if notifcation is in enum list for valid types ''' + constraints = [ + models.CheckConstraint( + check=models.Q(notification_type__in=NotificationType.values), + name="notification_type_valid", + ) + ] diff --git a/bookwyrm/models/readthrough.py b/bookwyrm/models/readthrough.py new file mode 100644 index 00000000..61cac7e6 --- /dev/null +++ b/bookwyrm/models/readthrough.py @@ -0,0 +1,26 @@ +''' progress in a book ''' +from django.db import models +from django.utils import timezone + +from .base_model import BookWyrmModel + + +class ReadThrough(BookWyrmModel): + ''' Store progress through a book in the database. ''' + user = models.ForeignKey('User', on_delete=models.PROTECT) + book = models.ForeignKey('Edition', on_delete=models.PROTECT) + pages_read = models.IntegerField( + null=True, + blank=True) + start_date = models.DateTimeField( + blank=True, + null=True) + finish_date = models.DateTimeField( + blank=True, + null=True) + + def save(self, *args, **kwargs): + ''' update user active time ''' + self.user.last_active_date = timezone.now() + self.user.save() + super().save(*args, **kwargs) diff --git a/bookwyrm/models/relationship.py b/bookwyrm/models/relationship.py index debe2ace..0f3c1dab 100644 --- a/bookwyrm/models/relationship.py +++ b/bookwyrm/models/relationship.py @@ -54,7 +54,7 @@ class UserRelationship(ActivitypubMixin, BookWyrmModel): def to_reject_activity(self): - ''' generate an Accept for this follow request ''' + ''' generate a Reject for this follow request ''' return activitypub.Reject( id=self.get_remote_id(status='rejects'), actor=self.user_object.remote_id, diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index 3654e554..2494c458 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -118,7 +118,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): activity['attachment'] = [ image_serializer(b.cover, b.alt_text) \ for b in self.mention_books.all()[:4] if b.cover] - if hasattr(self, 'book'): + if hasattr(self, 'book') and self.book.cover: activity['attachment'].append( image_serializer(self.book.cover, self.book.alt_text) ) @@ -222,26 +222,6 @@ class Review(Status): pure_type = 'Article' -class Favorite(ActivitypubMixin, BookWyrmModel): - ''' fav'ing a post ''' - user = fields.ForeignKey( - 'User', on_delete=models.PROTECT, activitypub_field='actor') - status = fields.ForeignKey( - 'Status', on_delete=models.PROTECT, activitypub_field='object') - - activity_serializer = activitypub.Like - - def save(self, *args, **kwargs): - ''' update user active time ''' - self.user.last_active_date = timezone.now() - self.user.save() - super().save(*args, **kwargs) - - class Meta: - ''' can't fav things twice ''' - unique_together = ('user', 'status') - - class Boost(Status): ''' boost'ing a post ''' boosted_status = fields.ForeignKey( @@ -268,54 +248,3 @@ class Boost(Status): # This constraint can't work as it would cross tables. # class Meta: # unique_together = ('user', 'boosted_status') - - -class ReadThrough(BookWyrmModel): - ''' Store progress through a book in the database. ''' - user = models.ForeignKey('User', on_delete=models.PROTECT) - book = models.ForeignKey('Edition', on_delete=models.PROTECT) - pages_read = models.IntegerField( - null=True, - blank=True) - start_date = models.DateTimeField( - blank=True, - null=True) - finish_date = models.DateTimeField( - blank=True, - null=True) - - def save(self, *args, **kwargs): - ''' update user active time ''' - self.user.last_active_date = timezone.now() - self.user.save() - super().save(*args, **kwargs) - - -NotificationType = models.TextChoices( - 'NotificationType', - 'FAVORITE REPLY MENTION TAG FOLLOW FOLLOW_REQUEST BOOST IMPORT') - -class Notification(BookWyrmModel): - ''' you've been tagged, liked, followed, etc ''' - user = models.ForeignKey('User', on_delete=models.PROTECT) - related_book = models.ForeignKey( - 'Edition', on_delete=models.PROTECT, null=True) - related_user = models.ForeignKey( - 'User', - on_delete=models.PROTECT, null=True, related_name='related_user') - related_status = models.ForeignKey( - 'Status', on_delete=models.PROTECT, null=True) - related_import = models.ForeignKey( - 'ImportJob', on_delete=models.PROTECT, null=True) - read = models.BooleanField(default=False) - notification_type = models.CharField( - max_length=255, choices=NotificationType.choices) - - class Meta: - ''' checks if notifcation is in enum list for valid types ''' - constraints = [ - models.CheckConstraint( - check=models.Q(notification_type__in=NotificationType.values), - name="notification_type_valid", - ) - ] diff --git a/bookwyrm/models/tag.py b/bookwyrm/models/tag.py index 940b4192..6e0ba8ab 100644 --- a/bookwyrm/models/tag.py +++ b/bookwyrm/models/tag.py @@ -17,7 +17,9 @@ class Tag(OrderedCollectionMixin, BookWyrmModel): @classmethod def book_queryset(cls, identifier): ''' county of books associated with this tag ''' - return cls.objects.filter(identifier=identifier) + return cls.objects.filter( + identifier=identifier + ).order_by('-updated_date') @property def collection_queryset(self): @@ -64,7 +66,7 @@ class UserTag(BookWyrmModel): id='%s#remove' % self.remote_id, actor=user.remote_id, object=self.book.to_activity(), - target=self.to_activity(), + target=self.remote_id, ).serialize() diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index 81662587..c9290e46 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -1,6 +1,7 @@ ''' database schema for user data ''' from urllib.parse import urlparse +from django.apps import apps from django.contrib.auth.models import AbstractUser from django.db import models from django.dispatch import receiver @@ -107,11 +108,22 @@ class User(OrderedCollectionPageMixin, AbstractUser): activity_serializer = activitypub.Person - def to_outbox(self, **kwargs): + def to_outbox(self, filter_type=None, **kwargs): ''' an ordered collection of statuses ''' - queryset = Status.objects.filter( + if filter_type: + filter_class = apps.get_model( + 'bookwyrm.%s' % filter_type, require_ready=True) + if not issubclass(filter_class, Status): + raise TypeError( + 'filter_status_class must be a subclass of models.Status') + queryset = filter_class.objects + else: + queryset = Status.objects + + queryset = queryset.filter( user=self, deleted=False, + privacy__in=['public', 'unlisted'], ).select_subclasses().order_by('-published_date') return self.to_ordered_collection(queryset, \ remote_id=self.outbox, **kwargs) @@ -119,14 +131,22 @@ class User(OrderedCollectionPageMixin, AbstractUser): def to_following_activity(self, **kwargs): ''' activitypub following list ''' remote_id = '%s/following' % self.remote_id - return self.to_ordered_collection(self.following.all(), \ - remote_id=remote_id, id_only=True, **kwargs) + return self.to_ordered_collection( + self.following.order_by('-updated_date').all(), + remote_id=remote_id, + id_only=True, + **kwargs + ) def to_followers_activity(self, **kwargs): ''' activitypub followers list ''' remote_id = '%s/followers' % self.remote_id - return self.to_ordered_collection(self.followers.all(), \ - remote_id=remote_id, id_only=True, **kwargs) + return self.to_ordered_collection( + self.followers.order_by('-updated_date').all(), + remote_id=remote_id, + id_only=True, + **kwargs + ) def to_activity(self): ''' override default AP serializer to add context object @@ -165,6 +185,11 @@ class User(OrderedCollectionPageMixin, AbstractUser): return super().save(*args, **kwargs) + @property + def local_path(self): + ''' this model doesn't inherit bookwyrm model, so here we are ''' + return '/user/%s' % (self.localname or self.username) + class KeyPair(ActivitypubMixin, BookWyrmModel): ''' public and private keys for a user ''' @@ -270,7 +295,7 @@ def get_or_create_remote_server(domain): @app.task def get_remote_reviews(outbox): ''' ingest reviews by a new remote bookwyrm user ''' - outbox_page = outbox + '?page=true' + outbox_page = outbox + '?page=true&type=Review' data = get_data(outbox_page) # TODO: pagination? diff --git a/bookwyrm/outgoing.py b/bookwyrm/outgoing.py index 00154cf4..88377d33 100644 --- a/bookwyrm/outgoing.py +++ b/bookwyrm/outgoing.py @@ -2,8 +2,10 @@ import re from django.db import IntegrityError, transaction -from django.http import HttpResponseNotFound, JsonResponse +from django.http import JsonResponse +from django.shortcuts import get_object_or_404 from django.views.decorators.csrf import csrf_exempt +from django.views.decorators.http import require_GET from markdown import markdown from requests import HTTPError @@ -20,19 +22,16 @@ from bookwyrm.utils import regex @csrf_exempt +@require_GET def outbox(request, username): ''' outbox for the requested user ''' - if request.method != 'GET': - return HttpResponseNotFound() + user = get_object_or_404(models.User, localname=username) + filter_type = request.GET.get('type') + if filter_type not in models.status_models: + filter_type = None - try: - user = models.User.objects.get(localname=username) - except models.User.DoesNotExist: - return HttpResponseNotFound() - - # collection overview return JsonResponse( - user.to_outbox(**request.GET), + user.to_outbox(**request.GET, filter_type=filter_type), encoder=activitypub.ActivityEncoder ) @@ -42,6 +41,9 @@ def handle_remote_webfinger(query): user = None # usernames could be @user@domain or user@domain + if not query: + return None + if query[0] == '@': query = query[1:] @@ -164,22 +166,23 @@ def handle_imported_book(user, item, include_reviews, privacy): if not item.book: return - if item.shelf: + existing_shelf = models.ShelfBook.objects.filter( + book=item.book, added_by=user).exists() + + # shelve the book if it hasn't been shelved already + if item.shelf and not existing_shelf: desired_shelf = models.Shelf.objects.get( identifier=item.shelf, user=user ) - # shelve the book if it hasn't been shelved already - shelf_book, created = models.ShelfBook.objects.get_or_create( + shelf_book = models.ShelfBook.objects.create( book=item.book, shelf=desired_shelf, added_by=user) - if created: - broadcast(user, shelf_book.to_add_activity(user), privacy=privacy) + broadcast(user, shelf_book.to_add_activity(user), privacy=privacy) - # only add new read-throughs if the item isn't already shelved - for read in item.reads: - read.book = item.book - read.user = user - read.save() + for read in item.reads: + read.book = item.book + read.user = user + read.save() if include_reviews and (item.rating or item.review): review_title = 'Review of {!r} on Goodreads'.format( @@ -218,8 +221,65 @@ def handle_status(user, form): status.save() # inspect the text for user tags - matches = [] - for match in re.finditer(regex.username, status.content): + content = status.content + for (mention_text, mention_user) in find_mentions(content): + # add them to status mentions fk + status.mention_users.add(mention_user) + + # turn the mention into a link + content = re.sub( + r'%s([^@]|$)' % mention_text, + r'%s\g<1>' % \ + (mention_user.remote_id, mention_text), + content) + + # add reply parent to mentions and notify + if status.reply_parent: + status.mention_users.add(status.reply_parent.user) + for mention_user in status.reply_parent.mention_users.all(): + status.mention_users.add(mention_user) + + if status.reply_parent.user.local: + create_notification( + status.reply_parent.user, + 'REPLY', + related_user=user, + related_status=status + ) + + # deduplicate mentions + status.mention_users.set(set(status.mention_users.all())) + # create mention notifications + for mention_user in status.mention_users.all(): + if status.reply_parent and mention_user == status.reply_parent.user: + continue + if mention_user.local: + create_notification( + mention_user, + 'MENTION', + related_user=user, + related_status=status + ) + + # don't apply formatting to generated notes + if not isinstance(status, models.GeneratedNote): + status.content = to_markdown(content) + # do apply formatting to quotes + if hasattr(status, 'quote'): + status.quote = to_markdown(status.quote) + + status.save() + + broadcast(user, status.to_create_activity(user), software='bookwyrm') + + # re-format the activity for non-bookwyrm servers + remote_activity = status.to_create_activity(user, pure=True) + broadcast(user, remote_activity, software='other') + + +def find_mentions(content): + ''' detect @mentions in raw status content ''' + for match in re.finditer(regex.strict_username, content): username = match.group().strip().split('@')[1:] if len(username) == 1: # this looks like a local user (@user), fill in the domain @@ -230,44 +290,7 @@ def handle_status(user, form): if not mention_user: # we can ignore users we don't know about continue - matches.append((match.group(), mention_user.remote_id)) - # add them to status mentions fk - status.mention_users.add(mention_user) - # create notification if the mentioned user is local - if mention_user.local: - create_notification( - mention_user, - 'MENTION', - related_user=user, - related_status=status - ) - # add mentions - content = status.content - for (username, url) in matches: - content = re.sub( - r'%s([^@])' % username, - r'%s\g<1>' % (url, username), - content) - if not isinstance(status, models.GeneratedNote): - status.content = to_markdown(content) - if hasattr(status, 'quote'): - status.quote = to_markdown(status.quote) - status.save() - - # notify reply parent or tagged users - if status.reply_parent and status.reply_parent.user.local: - create_notification( - status.reply_parent.user, - 'REPLY', - related_user=user, - related_status=status - ) - - broadcast(user, status.to_create_activity(user), software='bookwyrm') - - # re-format the activity for non-bookwyrm servers - remote_activity = status.to_create_activity(user, pure=True) - broadcast(user, remote_activity, software='other') + yield (match.group(), mention_user) def to_markdown(content): @@ -284,21 +307,6 @@ def to_markdown(content): return sanitizer.get_output() -def handle_tag(user, tag): - ''' tag a book ''' - broadcast(user, tag.to_add_activity(user)) - - -def handle_untag(user, book, name): - ''' tag a book ''' - book = models.Book.objects.get(id=book) - tag = models.Tag.objects.get(name=name, book=book, user=user) - tag_activity = tag.to_remove_activity(user) - tag.delete() - - broadcast(user, tag_activity) - - def handle_favorite(user, status): ''' a user likes a status ''' try: diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index c42215b4..46c38b5a 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -3,8 +3,11 @@ import os from environs import Env +import requests + env = Env() DOMAIN = env('DOMAIN') +VERSION = '0.0.1' PAGE_LENGTH = env('PAGE_LENGTH', 15) @@ -150,3 +153,6 @@ STATIC_URL = '/static/' STATIC_ROOT = os.path.join(BASE_DIR, env('STATIC_ROOT', 'static')) MEDIA_URL = '/images/' MEDIA_ROOT = os.path.join(BASE_DIR, env('MEDIA_ROOT', 'images')) + +USER_AGENT = "%s (BookWyrm/%s; +https://%s/)" % ( + requests.utils.default_user_agent(), VERSION, DOMAIN) diff --git a/bookwyrm/static/css/format.css b/bookwyrm/static/css/format.css index 5b5ff08d..7dab69b0 100644 --- a/bookwyrm/static/css/format.css +++ b/bookwyrm/static/css/format.css @@ -67,6 +67,13 @@ input.toggle-control:checked ~ .modal.toggle-content { width: max-content; max-width: 250px; } +.cover-container.is-large { + height: max-content; + max-width: 500px; +} +.cover-container.is-large img { + max-height: 500px; +} .cover-container.is-medium { height: 150px; } diff --git a/bookwyrm/templates/author.html b/bookwyrm/templates/author.html index e51ef302..4235b266 100644 --- a/bookwyrm/templates/author.html +++ b/bookwyrm/templates/author.html @@ -8,7 +8,7 @@ {% if request.user.is_authenticated and perms.bookwyrm.edit_book %}
- + Edit Author diff --git a/bookwyrm/templates/book.html b/bookwyrm/templates/book.html index 506ee3db..4bbc8d10 100644 --- a/bookwyrm/templates/book.html +++ b/bookwyrm/templates/book.html @@ -166,10 +166,10 @@ {% for rating in ratings %}
-
{% include 'snippets/avatar.html' %}
+
{% include 'snippets/avatar.html' with user=rating.user %}
- {% include 'snippets/username.html' %} + {% include 'snippets/username.html' with user=rating.user %}
rated it
diff --git a/bookwyrm/templates/discover.html b/bookwyrm/templates/discover.html new file mode 100644 index 00000000..a28a67bd --- /dev/null +++ b/bookwyrm/templates/discover.html @@ -0,0 +1,80 @@ +{% extends 'layout.html' %} +{% block content %} + +{% if not request.user.is_authenticated %} +
+

{{ site.name }}: Social Reading and Reviewing

+
+ +
+
+
+ {% include 'snippets/about.html' %} +
+
+
+
+ {% if site.allow_registration %} +

Join {{ site.name }}

+
+ {% include 'snippets/register_form.html' %} +
+ {% else %} +

This instance is closed

+

Contact an administrator to get an invite

+ {% endif %} +
+
+
+{% else %} +
+

Discover

+
+{% endif %} + +
+

Recent Books

+
+ +
+
+
+
+ {% include 'snippets/discover/large-book.html' with book=books.0 %} +
+
+
+
+
+ {% include 'snippets/discover/small-book.html' with book=books.1 %} +
+
+
+
+ {% include 'snippets/discover/small-book.html' with book=books.2 %} +
+
+
+
+
+
+
+
+ {% include 'snippets/discover/small-book.html' with book=books.3 %} +
+
+
+
+ {% include 'snippets/discover/small-book.html' with book=books.4 %} +
+
+
+
+
+ {% include 'snippets/discover/large-book.html' with book=books.5 %} +
+
+
+
+ +{% endblock %} diff --git a/bookwyrm/templates/following.html b/bookwyrm/templates/following.html index 478ca813..bdf02c74 100644 --- a/bookwyrm/templates/following.html +++ b/bookwyrm/templates/following.html @@ -31,7 +31,7 @@
{% endfor %} {% if not following.count %} -
No one is following {{ user|username }}
+
{{ user|username }} isn't following any users
{% endif %}
diff --git a/bookwyrm/templates/import.html b/bookwyrm/templates/import.html index 8e3f5eb4..bfa8d3ec 100644 --- a/bookwyrm/templates/import.html +++ b/bookwyrm/templates/import.html @@ -21,8 +21,6 @@
-

- Imports are limited in size, and only the first {{ limit }} items will be imported.

diff --git a/bookwyrm/templates/layout.html b/bookwyrm/templates/layout.html index ab113ad0..2e8fdcac 100644 --- a/bookwyrm/templates/layout.html +++ b/bookwyrm/templates/layout.html @@ -18,7 +18,7 @@ -