mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-29 13:01:08 +00:00
commit
6f7c60bf79
16 changed files with 188 additions and 79 deletions
|
@ -4,6 +4,7 @@ import sys
|
|||
|
||||
from .base_activity import ActivityEncoder, Image, PublicKey, Signature
|
||||
from .base_activity import Link, Mention
|
||||
from .base_activity import ActivitySerializerError
|
||||
from .note import Note, GeneratedNote, Article, Comment, Review, Quotation
|
||||
from .note import Tombstone
|
||||
from .interaction import Boost, Like
|
||||
|
|
|
@ -75,7 +75,7 @@ class AbstractConnector(ABC):
|
|||
''' pull up a book record by whatever means possible '''
|
||||
# try to load the book
|
||||
book = models.Book.objects.select_subclasses().filter(
|
||||
remote_id=remote_id
|
||||
origin_id=remote_id
|
||||
).first()
|
||||
if book:
|
||||
if isinstance(book, models.Work):
|
||||
|
@ -148,7 +148,7 @@ class AbstractConnector(ABC):
|
|||
def create_book(self, remote_id, data, model):
|
||||
''' create a work or edition from data '''
|
||||
book = model.objects.create(
|
||||
remote_id=remote_id,
|
||||
origin_id=remote_id,
|
||||
title=data['title'],
|
||||
connector=self.connector,
|
||||
)
|
||||
|
|
|
@ -99,14 +99,14 @@ class Connector(AbstractConnector):
|
|||
def get_or_create_author(self, remote_id):
|
||||
''' load that author '''
|
||||
try:
|
||||
return models.Author.objects.get(remote_id=remote_id)
|
||||
return models.Author.objects.get(origin_id=remote_id)
|
||||
except ObjectDoesNotExist:
|
||||
pass
|
||||
|
||||
data = get_data(remote_id)
|
||||
|
||||
# ingest a new author
|
||||
author = models.Author(remote_id=remote_id)
|
||||
author = models.Author(origin_id=remote_id)
|
||||
author = update_from_mappings(author, data, self.author_mappings)
|
||||
author.save()
|
||||
|
||||
|
|
|
@ -13,16 +13,16 @@ class Connector(AbstractConnector):
|
|||
that gets implemented it will totally rule '''
|
||||
vector = SearchVector('title', weight='A') +\
|
||||
SearchVector('subtitle', weight='B') +\
|
||||
SearchVector('author_text', weight='A') +\
|
||||
SearchVector('author_text', weight='C') +\
|
||||
SearchVector('isbn_13', weight='A') +\
|
||||
SearchVector('isbn_10', weight='A') +\
|
||||
SearchVector('openlibrary_key', weight='B') +\
|
||||
SearchVector('goodreads_key', weight='B') +\
|
||||
SearchVector('asin', weight='B') +\
|
||||
SearchVector('oclc_number', weight='B') +\
|
||||
SearchVector('remote_id', weight='B') +\
|
||||
SearchVector('description', weight='C') +\
|
||||
SearchVector('series', weight='C')
|
||||
SearchVector('openlibrary_key', weight='C') +\
|
||||
SearchVector('goodreads_key', weight='C') +\
|
||||
SearchVector('asin', weight='C') +\
|
||||
SearchVector('oclc_number', weight='C') +\
|
||||
SearchVector('remote_id', weight='C') +\
|
||||
SearchVector('description', weight='D') +\
|
||||
SearchVector('series', weight='D')
|
||||
|
||||
results = models.Edition.objects.annotate(
|
||||
search=vector
|
||||
|
@ -47,7 +47,7 @@ class Connector(AbstractConnector):
|
|||
def format_search_result(self, search_result):
|
||||
return SearchResult(
|
||||
title=search_result.title,
|
||||
key=search_result.local_id,
|
||||
key=search_result.remote_id,
|
||||
author=search_result.author_text,
|
||||
year=search_result.published_date.year if \
|
||||
search_result.published_date else None,
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
''' handle reading a csv from goodreads '''
|
||||
import csv
|
||||
from requests import HTTPError
|
||||
import logging
|
||||
|
||||
from bookwyrm import outgoing
|
||||
from bookwyrm.tasks import app
|
||||
from bookwyrm.models import ImportJob, ImportItem
|
||||
from bookwyrm.status import create_notification
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# TODO: remove or increase once we're confident it's not causing problems.
|
||||
MAX_ENTRIES = 500
|
||||
|
||||
|
@ -24,6 +25,17 @@ def create_job(user, csv_file, include_reviews, privacy):
|
|||
ImportItem(job=job, index=index, data=entry).save()
|
||||
return job
|
||||
|
||||
def create_retry_job(user, original_job, items):
|
||||
''' retry items that didn't import '''
|
||||
job = ImportJob.objects.create(
|
||||
user=user,
|
||||
include_reviews=original_job.include_reviews,
|
||||
privacy=original_job.privacy,
|
||||
retry=True
|
||||
)
|
||||
for item in items:
|
||||
ImportItem(job=job, index=item.index, data=item.data).save()
|
||||
return job
|
||||
|
||||
def start_import(job):
|
||||
''' initalizes a csv import job '''
|
||||
|
@ -41,7 +53,8 @@ def import_data(job_id):
|
|||
for item in job.items.all():
|
||||
try:
|
||||
item.resolve()
|
||||
except:
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
item.fail_reason = 'Error loading book'
|
||||
item.save()
|
||||
continue
|
||||
|
|
18
bookwyrm/migrations/0010_importjob_retry.py
Normal file
18
bookwyrm/migrations/0010_importjob_retry.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
# Generated by Django 3.0.7 on 2020-11-13 15:54
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('bookwyrm', '0009_shelf_privacy'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='importjob',
|
||||
name='retry',
|
||||
field=models.BooleanField(default=False),
|
||||
),
|
||||
]
|
33
bookwyrm/migrations/0011_auto_20201113_1727.py
Normal file
33
bookwyrm/migrations/0011_auto_20201113_1727.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Generated by Django 3.0.7 on 2020-11-13 17:27
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
def set_origin_id(app_registry, schema_editor):
|
||||
db_alias = schema_editor.connection.alias
|
||||
books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias)
|
||||
for book in books:
|
||||
book.origin_id = book.remote_id
|
||||
# the remote_id will be set automatically
|
||||
book.remote_id = None
|
||||
book.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('bookwyrm', '0010_importjob_retry'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='author',
|
||||
name='origin_id',
|
||||
field=models.CharField(max_length=255, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='book',
|
||||
name='origin_id',
|
||||
field=models.CharField(max_length=255, null=True),
|
||||
),
|
||||
migrations.RunPython(set_origin_id),
|
||||
]
|
|
@ -68,9 +68,7 @@ class ActivitypubMixin:
|
|||
if not hasattr(self, mapping.model_key) or not mapping.activity_key:
|
||||
continue
|
||||
value = getattr(self, mapping.model_key)
|
||||
if hasattr(value, 'local_id'):
|
||||
value = value.local_id
|
||||
elif hasattr(value, 'remote_id'):
|
||||
if hasattr(value, 'remote_id'):
|
||||
value = value.remote_id
|
||||
if isinstance(value, datetime):
|
||||
value = value.isoformat()
|
||||
|
|
|
@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin
|
|||
|
||||
class Book(ActivitypubMixin, BookWyrmModel):
|
||||
''' a generic book, which can mean either an edition or a work '''
|
||||
origin_id = models.CharField(max_length=255, null=True)
|
||||
# these identifiers apply to both works and editions
|
||||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
librarything_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
|
@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
|||
@property
|
||||
def ap_authors(self):
|
||||
''' the activitypub serialization should be a list of author ids '''
|
||||
return [a.local_id for a in self.authors.all()]
|
||||
return [a.remote_id for a in self.authors.all()]
|
||||
|
||||
@property
|
||||
def ap_cover(self):
|
||||
|
@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
|||
@property
|
||||
def ap_parent_work(self):
|
||||
''' reference the work via local id not remote '''
|
||||
return self.parent_work.local_id
|
||||
return self.parent_work.remote_id
|
||||
|
||||
activity_mappings = [
|
||||
ActivityMapping('id', 'local_id'),
|
||||
ActivityMapping('id', 'remote_id'),
|
||||
|
||||
ActivityMapping('authors', 'ap_authors'),
|
||||
ActivityMapping('first_published_date', 'first_published_date'),
|
||||
|
@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
|||
''' can't be abstract for query reasons, but you shouldn't USE it '''
|
||||
if not isinstance(self, Edition) and not isinstance(self, Work):
|
||||
raise ValueError('Books should be added as Editions or Works')
|
||||
if self.id and not self.remote_id:
|
||||
self.remote_id = self.get_remote_id()
|
||||
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
|
@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
|||
''' editions and works both use "book" instead of model_name '''
|
||||
return 'https://%s/book/%d' % (DOMAIN, self.id)
|
||||
|
||||
|
||||
@property
|
||||
def local_id(self):
|
||||
''' when a book is ingested from an outside source, it becomes local to
|
||||
an instance, so it needs a local url for federation. but it still needs
|
||||
the remote_id for easier deduplication and, if appropriate, to sync with
|
||||
the remote canonical copy '''
|
||||
return 'https://%s/book/%d' % (DOMAIN, self.id)
|
||||
|
||||
def __repr__(self):
|
||||
return "<{} key={!r} title={!r}>".format(
|
||||
self.__class__,
|
||||
|
@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book):
|
|||
''' it'd be nice to serialize the edition instead but, recursion '''
|
||||
default = self.default_edition
|
||||
ed_list = [
|
||||
e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all()
|
||||
e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all()
|
||||
]
|
||||
return [default.local_id] + ed_list
|
||||
return [default.remote_id] + ed_list
|
||||
|
||||
|
||||
def to_edition_list(self, **kwargs):
|
||||
''' activitypub serialization for this work's editions '''
|
||||
remote_id = self.local_id + '/editions'
|
||||
remote_id = self.remote_id + '/editions'
|
||||
return self.to_ordered_collection(
|
||||
self.edition_set,
|
||||
remote_id=remote_id,
|
||||
|
@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13):
|
|||
|
||||
|
||||
class Author(ActivitypubMixin, BookWyrmModel):
|
||||
origin_id = models.CharField(max_length=255, null=True)
|
||||
''' copy of an author from OL '''
|
||||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
sync = models.BooleanField(default=True)
|
||||
|
@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel):
|
|||
)
|
||||
bio = models.TextField(null=True, blank=True)
|
||||
|
||||
@property
|
||||
def local_id(self):
|
||||
''' when a book is ingested from an outside source, it becomes local to
|
||||
an instance, so it needs a local url for federation. but it still needs
|
||||
the remote_id for easier deduplication and, if appropriate, to sync with
|
||||
the remote canonical copy (ditto here for author)'''
|
||||
return 'https://%s/author/%d' % (DOMAIN, self.id)
|
||||
|
||||
@property
|
||||
def display_name(self):
|
||||
''' Helper to return a displayable name'''
|
||||
|
@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel):
|
|||
return self.last_name or self.first_name
|
||||
|
||||
activity_mappings = [
|
||||
ActivityMapping('id', 'local_id'),
|
||||
ActivityMapping('id', 'remote_id'),
|
||||
ActivityMapping('name', 'display_name'),
|
||||
ActivityMapping('born', 'born'),
|
||||
ActivityMapping('died', 'died'),
|
||||
|
|
|
@ -48,6 +48,7 @@ class ImportJob(models.Model):
|
|||
default='public',
|
||||
choices=PrivacyLevels.choices
|
||||
)
|
||||
retry = models.BooleanField(default=False)
|
||||
|
||||
|
||||
class ImportItem(models.Model):
|
||||
|
@ -72,14 +73,11 @@ class ImportItem(models.Model):
|
|||
def get_book_from_isbn(self):
|
||||
''' search by isbn '''
|
||||
search_result = books_manager.first_search_result(
|
||||
self.isbn, min_confidence=0.995
|
||||
self.isbn, min_confidence=0.999
|
||||
)
|
||||
if search_result:
|
||||
try:
|
||||
# don't crash the import when the connector fails
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
except ConnectorException:
|
||||
pass
|
||||
# raises ConnectorException
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
return None
|
||||
|
||||
|
||||
|
@ -90,16 +88,24 @@ class ImportItem(models.Model):
|
|||
self.data['Author']
|
||||
)
|
||||
search_result = books_manager.first_search_result(
|
||||
search_term, min_confidence=0.995
|
||||
search_term, min_confidence=0.999
|
||||
)
|
||||
if search_result:
|
||||
try:
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
except ConnectorException:
|
||||
pass
|
||||
# raises ConnectorException
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
return None
|
||||
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
''' get the book title '''
|
||||
return self.data['Title']
|
||||
|
||||
@property
|
||||
def author(self):
|
||||
''' get the book title '''
|
||||
return self.data['Author']
|
||||
|
||||
@property
|
||||
def isbn(self):
|
||||
''' pulls out the isbn13 field from the csv line data '''
|
||||
|
|
|
@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
|
|||
tags = []
|
||||
for book in self.mention_books.all():
|
||||
tags.append(activitypub.Link(
|
||||
href=book.local_id,
|
||||
href=book.remote_id,
|
||||
name=book.title,
|
||||
type='Book'
|
||||
))
|
||||
|
@ -160,7 +160,7 @@ class GeneratedNote(Status):
|
|||
''' indicate the book in question for mastodon (or w/e) users '''
|
||||
message = self.content
|
||||
books = ', '.join(
|
||||
'<a href="%s">"%s"</a>' % (self.book.local_id, self.book.title) \
|
||||
'<a href="%s">"%s"</a>' % (self.book.remote_id, self.book.title) \
|
||||
for book in self.mention_books.all()
|
||||
)
|
||||
return '%s %s' % (message, books)
|
||||
|
@ -177,7 +177,7 @@ class Comment(Status):
|
|||
def ap_pure_content(self):
|
||||
''' indicate the book in question for mastodon (or w/e) users '''
|
||||
return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \
|
||||
(self.book.local_id, self.book.title)
|
||||
(self.book.remote_id, self.book.title)
|
||||
|
||||
activity_serializer = activitypub.Comment
|
||||
pure_activity_serializer = activitypub.Note
|
||||
|
@ -193,7 +193,7 @@ class Quotation(Status):
|
|||
''' indicate the book in question for mastodon (or w/e) users '''
|
||||
return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % (
|
||||
self.quote,
|
||||
self.book.local_id,
|
||||
self.book.remote_id,
|
||||
self.book.title,
|
||||
self.content,
|
||||
)
|
||||
|
@ -231,7 +231,7 @@ class Review(Status):
|
|||
def ap_pure_content(self):
|
||||
''' indicate the book in question for mastodon (or w/e) users '''
|
||||
return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \
|
||||
(self.book.local_id, self.book.title)
|
||||
(self.book.remote_id, self.book.title)
|
||||
|
||||
activity_serializer = activitypub.Review
|
||||
pure_activity_serializer = activitypub.Article
|
||||
|
|
|
@ -20,6 +20,10 @@ function reply(e) {
|
|||
return true;
|
||||
}
|
||||
|
||||
function selectAll(el) {
|
||||
el.parentElement.querySelectorAll('[type="checkbox"]')
|
||||
.forEach(t => t.checked=true);
|
||||
}
|
||||
|
||||
function rate_stars(e) {
|
||||
e.preventDefault();
|
||||
|
|
|
@ -29,16 +29,52 @@
|
|||
{% if failed_items %}
|
||||
<div class="block">
|
||||
<h2 class="title is-4">Failed to load</h2>
|
||||
<ul>
|
||||
{% for item in failed_items %}
|
||||
<li>
|
||||
Line {{ item.index }}:
|
||||
<strong>{{ item.data|dict_key:'Title' }}</strong> by
|
||||
{{ item.data|dict_key:'Author' }}
|
||||
({{ item.fail_reason }})
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% if not job.retry %}
|
||||
<form name="retry" action="/retry-import/" method="post">
|
||||
{% csrf_token %}
|
||||
<input type="hidden" name="import_job" value="{{ job.id }}">
|
||||
<ul>
|
||||
<fieldset>
|
||||
{% for item in failed_items %}
|
||||
<li class="pb-1">
|
||||
<input class="checkbox" type="checkbox" name="import_item" value="{{ item.id }}" id="import-item-{{ item.id }}">
|
||||
<label for="import-item-{{ item.id }}">
|
||||
Line {{ item.index }}:
|
||||
<strong>{{ item.data|dict_key:'Title' }}</strong> by
|
||||
{{ item.data|dict_key:'Author' }}
|
||||
</label>
|
||||
<p>
|
||||
{{ item.fail_reason }}.
|
||||
</p>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</fieldset>
|
||||
</ul>
|
||||
<div class="block pt-1" onclick="selectAll(this)">
|
||||
<label class="label">
|
||||
<input type="checkbox" class="checkbox">
|
||||
Select all
|
||||
</label>
|
||||
</div>
|
||||
<button class="button" type="submit">Retry items</button>
|
||||
{% else %}
|
||||
<ul>
|
||||
{% for item in failed_items %}
|
||||
<li class="pb-1">
|
||||
<p>
|
||||
Line {{ item.index }}:
|
||||
<strong>{{ item.data|dict_key:'Title' }}</strong> by
|
||||
{{ item.data|dict_key:'Author' }}
|
||||
</p>
|
||||
<p>
|
||||
{{ item.fail_reason }}.
|
||||
<a href="/create-book/{{ item.id }}">Manually add book</a>
|
||||
</p>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</form>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
|
|
@ -22,15 +22,10 @@ class Book(TestCase):
|
|||
)
|
||||
|
||||
def test_remote_id(self):
|
||||
local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
|
||||
self.assertEqual(self.work.get_remote_id(), local_id)
|
||||
remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
|
||||
self.assertEqual(self.work.get_remote_id(), remote_id)
|
||||
self.assertEqual(self.work.remote_id, 'https://example.com/book/1')
|
||||
|
||||
def test_local_id(self):
|
||||
''' the local_id property for books '''
|
||||
expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
|
||||
self.assertEqual(self.work.local_id, expected_id)
|
||||
|
||||
def test_create_book(self):
|
||||
''' you shouldn't be able to create Books (only editions and works) '''
|
||||
self.assertRaises(
|
||||
|
|
|
@ -54,9 +54,9 @@ urlpatterns = [
|
|||
path('', views.home),
|
||||
re_path(r'^(?P<tab>home|local|federated)/?$', views.home_tab),
|
||||
re_path(r'^notifications/?', views.notifications_page),
|
||||
re_path(r'import/?$', views.import_page),
|
||||
re_path(r'import-status/(\d+)/?$', views.import_status),
|
||||
re_path(r'user-edit/?$', views.edit_profile_page),
|
||||
re_path(r'^import/?$', views.import_page),
|
||||
re_path(r'^import-status/(\d+)/?$', views.import_status),
|
||||
re_path(r'^user-edit/?$', views.edit_profile_page),
|
||||
|
||||
# should return a ui view or activitypub json blob as requested
|
||||
# users
|
||||
|
@ -98,6 +98,7 @@ urlpatterns = [
|
|||
re_path(r'^edit-profile/?$', actions.edit_profile),
|
||||
|
||||
re_path(r'^import-data/?', actions.import_data),
|
||||
re_path(r'^retry-import/?', actions.retry_import),
|
||||
re_path(r'^resolve-book/?', actions.resolve_book),
|
||||
re_path(r'^edit-book/(?P<book_id>\d+)/?', actions.edit_book),
|
||||
re_path(r'^upload-cover/(?P<book_id>\d+)/?', actions.upload_cover),
|
||||
|
|
|
@ -662,10 +662,27 @@ def import_data(request):
|
|||
except (UnicodeDecodeError, ValueError):
|
||||
return HttpResponseBadRequest('Not a valid csv file')
|
||||
goodreads_import.start_import(job)
|
||||
return redirect('/import-status/%d' % (job.id,))
|
||||
return redirect('/import-status/%d' % job.id)
|
||||
return HttpResponseBadRequest()
|
||||
|
||||
|
||||
@login_required
|
||||
def retry_import(request):
|
||||
''' ingest a goodreads csv '''
|
||||
job = get_object_or_404(models.ImportJob, id=request.POST.get('import_job'))
|
||||
items = []
|
||||
for item in request.POST.getlist('import_item'):
|
||||
items.append(get_object_or_404(models.ImportItem, id=item))
|
||||
|
||||
job = goodreads_import.create_retry_job(
|
||||
request.user,
|
||||
job,
|
||||
items,
|
||||
)
|
||||
goodreads_import.start_import(job)
|
||||
return redirect('/import-status/%d' % job.id)
|
||||
|
||||
|
||||
@login_required
|
||||
@permission_required('bookwyrm.create_invites', raise_exception=True)
|
||||
def create_invite(request):
|
||||
|
|
Loading…
Reference in a new issue