Merge branch 'main' into make-remote-follow-username-regex-comply-to-rfc-7565

This commit is contained in:
Hugh Rundle 2025-03-30 16:47:27 +11:00 committed by GitHub
commit c5b052375e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1546 additions and 381 deletions

View file

@ -22,6 +22,21 @@ class BookwyrmImporter:
job = BookwyrmImportJob.objects.create(
user=user, archive_file=archive_file, required=required
)
return job
def create_retry_job(
self, user: User, original_job: BookwyrmImportJob
) -> BookwyrmImportJob:
"""retry items that didn't import"""
job = BookwyrmImportJob.objects.create(
user=user,
archive_file=original_job.archive_file,
required=original_job.required,
retry=True,
)
return job

View file

@ -0,0 +1,151 @@
# Generated by Django 4.2.20 on 2025-03-28 07:37
import bookwyrm.models.fields
import django.contrib.postgres.fields
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0211_author_finna_key_book_finna_key"),
]
operations = [
migrations.CreateModel(
name="UserRelationshipImport",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
(
"relationship",
bookwyrm.models.fields.CharField(
choices=[("follow", "Follow"), ("block", "Block")],
max_length=10,
null=True,
),
),
(
"remote_id",
bookwyrm.models.fields.RemoteIdField(
max_length=255,
null=True,
validators=[bookwyrm.models.fields.validate_remote_id],
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
migrations.RemoveField(
model_name="bookwyrmexportjob",
name="json_completed",
),
migrations.AddField(
model_name="bookwyrmimportjob",
name="retry",
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name="childjob",
name="fail_reason",
field=models.TextField(null=True),
),
migrations.AddField(
model_name="parentjob",
name="fail_reason",
field=models.TextField(null=True),
),
migrations.AlterField(
model_name="bookwyrmimportjob",
name="required",
field=django.contrib.postgres.fields.ArrayField(
base_field=bookwyrm.models.fields.CharField(blank=True, max_length=50),
blank=True,
size=None,
),
),
migrations.CreateModel(
name="UserImportPost",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
("json", models.JSONField()),
(
"status_type",
bookwyrm.models.fields.CharField(
choices=[
("comment", "Comment"),
("review", "Review"),
("quote", "Quotation"),
],
default="comment",
max_length=10,
null=True,
),
),
(
"book",
bookwyrm.models.fields.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
to="bookwyrm.edition",
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
migrations.CreateModel(
name="UserImportBook",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
("book_data", models.JSONField()),
(
"book",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="bookwyrm.book",
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
]

View file

@ -26,7 +26,12 @@ from .federated_server import FederatedServer
from .group import Group, GroupMember, GroupMemberInvitation
from .import_job import ImportJob, ImportItem
from .bookwyrm_import_job import BookwyrmImportJob
from .bookwyrm_import_job import (
BookwyrmImportJob,
UserImportBook,
UserImportPost,
import_book_task,
)
from .bookwyrm_export_job import BookwyrmExportJob
from .move import MoveUser

View file

@ -6,7 +6,7 @@ import os
from boto3.session import Session as BotoSession
from s3_tar import S3Tar
from django.db.models import BooleanField, FileField, JSONField
from django.db.models import FileField, JSONField
from django.core.serializers.json import DjangoJSONEncoder
from django.core.files.base import ContentFile
from django.core.files.storage import storages
@ -17,7 +17,7 @@ from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem
from bookwyrm.models import Review, Comment, Quotation
from bookwyrm.models import Edition
from bookwyrm.models import UserFollows, User, UserBlocks
from bookwyrm.models.job import ParentJob
from bookwyrm.models.job import ParentJob, ParentTask
from bookwyrm.tasks import app, IMPORTS
from bookwyrm.utils.tar import BookwyrmTarFile
@ -42,38 +42,41 @@ class BookwyrmExportJob(ParentJob):
export_data = FileField(null=True, storage=select_exports_storage)
export_json = JSONField(null=True, encoder=DjangoJSONEncoder)
json_completed = BooleanField(default=False)
def start_job(self):
"""schedule the first task"""
task = create_export_json_task.delay(job_id=self.id)
self.task_id = task.id
self.save(update_fields=["task_id"])
self.set_status("active")
create_export_json_task.delay(job_id=self.id)
@app.task(queue=IMPORTS)
def create_export_json_task(job_id):
@app.task(queue=IMPORTS, base=ParentTask)
def create_export_json_task(**kwargs):
"""create the JSON data for the export"""
job = BookwyrmExportJob.objects.get(id=job_id)
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])
# don't start the job if it was stopped from the UI
if job.complete:
if job.status == "stopped":
return
try:
job.set_status("active")
# generate JSON structure
job.export_json = export_json(job.user)
# generate JSON
data = export_user(job.user)
data["settings"] = export_settings(job.user)
data["goals"] = export_goals(job.user)
data["books"] = export_books(job.user)
data["saved_lists"] = export_saved_lists(job.user)
data["follows"] = export_follows(job.user)
data["blocks"] = export_blocks(job.user)
job.export_json = data
job.save(update_fields=["export_json"])
# create archive in separate task
# trigger task to create tar file
create_archive_task.delay(job_id=job.id)
except Exception as err: # pylint: disable=broad-except
logger.exception(
"create_export_json_task for %s failed with error: %s", job, err
"create_export_json_task for job %s failed with error: %s", job.id, err
)
job.set_status("failed")
@ -94,21 +97,20 @@ def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""):
)
@app.task(queue=IMPORTS)
def create_archive_task(job_id):
@app.task(queue=IMPORTS, base=ParentTask)
def create_archive_task(**kwargs):
"""create the archive containing the JSON file and additional files"""
job = BookwyrmExportJob.objects.get(id=job_id)
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])
# don't start the job if it was stopped from the UI
if job.complete:
if job.status == "stopped":
return
try:
export_task_id = str(job.task_id)
archive_filename = f"{export_task_id}.tar.gz"
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")
user = job.user
editions = get_books_for_user(user)
@ -169,25 +171,15 @@ def create_archive_task(job_id):
tar.add_image(edition.cover, directory="images")
job.save(update_fields=["export_data"])
job.set_status("completed")
job.complete_job()
except Exception as err: # pylint: disable=broad-except
logger.exception("create_archive_task for %s failed with error: %s", job, err)
logger.exception(
"create_archive_task for job %s failed with error: %s", job.id, err
)
job.set_status("failed")
def export_json(user: User):
"""create export JSON"""
data = export_user(user) # in the root of the JSON structure
data["settings"] = export_settings(user)
data["goals"] = export_goals(user)
data["books"] = export_books(user)
data["saved_lists"] = export_saved_lists(user)
data["follows"] = export_follows(user)
data["blocks"] = export_blocks(user)
return data
def export_user(user: User):
"""export user data"""
data = user.to_activity()
@ -316,11 +308,9 @@ def export_book(user: User, edition: Edition):
def get_books_for_user(user):
"""
Get all the books and editions related to a user.
We use union() instead of Q objects because it creates
multiple simple queries in stead of a much more complex DB query
multiple simple queries instead of a complex DB query
that can time out.
"""
shelf_eds = Edition.objects.select_related("parent_work").filter(shelves__user=user)

View file

@ -2,16 +2,26 @@
import json
import logging
import math
from django.db.models import FileField, JSONField, CharField
from django.db.models import (
BooleanField,
ForeignKey,
FileField,
JSONField,
TextChoices,
PROTECT,
SET_NULL,
)
from django.utils import timezone
from django.utils.html import strip_tags
from django.utils.translation import gettext_lazy as _
from django.contrib.postgres.fields import ArrayField as DjangoArrayField
from bookwyrm import activitypub
from bookwyrm import models
from bookwyrm.tasks import app, IMPORTS
from bookwyrm.models.job import ParentJob, ParentTask, SubTask
from bookwyrm.models.job import ParentJob, ChildJob, ParentTask, SubTask
from bookwyrm.utils.tar import BookwyrmTarFile
logger = logging.getLogger(__name__)
@ -22,23 +32,130 @@ class BookwyrmImportJob(ParentJob):
archive_file = FileField(null=True, blank=True)
import_data = JSONField(null=True)
required = DjangoArrayField(CharField(max_length=50, blank=True), blank=True)
required = DjangoArrayField(
models.fields.CharField(max_length=50, blank=True), blank=True
)
retry = BooleanField(default=False)
def start_job(self):
"""Start the job"""
start_import_task.delay(job_id=self.id, no_children=True)
start_import_task.delay(job_id=self.id)
@property
def book_tasks(self):
"""How many import book tasks are there?"""
return UserImportBook.objects.filter(parent_job=self).all()
@property
def status_tasks(self):
"""How many import status tasks are there?"""
return UserImportPost.objects.filter(parent_job=self).all()
@property
def relationship_tasks(self):
"""How many import relationship tasks are there?"""
return UserRelationshipImport.objects.filter(parent_job=self).all()
@property
def item_count(self):
"""How many total tasks are there?"""
return self.book_tasks.count() + self.status_tasks.count()
@property
def pending_item_count(self):
"""How many tasks are incomplete?"""
status = BookwyrmImportJob.Status
book_tasks = self.book_tasks.filter(
status__in=[status.PENDING, status.ACTIVE]
).count()
status_tasks = self.status_tasks.filter(
status__in=[status.PENDING, status.ACTIVE]
).count()
relationship_tasks = self.relationship_tasks.filter(
status__in=[status.PENDING, status.ACTIVE]
).count()
return book_tasks + status_tasks + relationship_tasks
@property
def percent_complete(self):
"""How far along?"""
item_count = self.item_count
if not item_count:
return 0
return math.floor((item_count - self.pending_item_count) / item_count * 100)
class UserImportBook(ChildJob):
"""ChildJob to import each book.
Equivalent to ImportItem when importing a csv file of books"""
book = ForeignKey(models.Book, on_delete=SET_NULL, null=True, blank=True)
book_data = JSONField(null=False)
def start_job(self):
"""Start the job"""
import_book_task.delay(child_id=self.id)
class UserImportPost(ChildJob):
"""ChildJob for comments, quotes, and reviews"""
class StatusType(TextChoices):
"""Possible status types."""
COMMENT = "comment", _("Comment")
REVIEW = "review", _("Review")
QUOTE = "quote", _("Quotation")
json = JSONField(null=False)
book = models.fields.ForeignKey(
"Edition", on_delete=PROTECT, activitypub_field="inReplyToBook"
)
status_type = models.fields.CharField(
max_length=10, choices=StatusType.choices, default=StatusType.COMMENT, null=True
)
def start_job(self):
"""Start the job"""
upsert_status_task.delay(child_id=self.id)
class UserRelationshipImport(ChildJob):
"""ChildJob for follows and blocks"""
class RelationshipType(TextChoices):
"""Possible relationship types."""
FOLLOW = "follow", _("Follow")
BLOCK = "block", _("Block")
relationship = models.fields.CharField(
max_length=10, choices=RelationshipType.choices, null=True
)
remote_id = models.fields.RemoteIdField(null=True, unique=False)
def start_job(self):
"""Start the job"""
import_user_relationship_task.delay(child_id=self.id)
@app.task(queue=IMPORTS, base=ParentTask)
def start_import_task(**kwargs):
"""trigger the child import tasks for each user data"""
"""trigger the child import tasks for each user data
We always import the books even if not assigning
them to shelves, lists etc"""
job = BookwyrmImportJob.objects.get(id=kwargs["job_id"])
archive_file = job.archive_file
archive_file = job.bookwyrmimportjob.archive_file
# don't start the job if it was stopped from the UI
if job.complete:
if job.status == "stopped":
return
job.status = "active"
job.save(update_fields=["status"])
try:
archive_file.open("rb")
with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar:
@ -56,13 +173,23 @@ def start_import_task(**kwargs):
if "include_saved_lists" in job.required:
upsert_saved_lists(job.user, job.import_data.get("saved_lists", []))
if "include_follows" in job.required:
upsert_follows(job.user, job.import_data.get("follows", []))
for remote_id in job.import_data.get("follows", []):
UserRelationshipImport.objects.create(
parent_job=job, remote_id=remote_id, relationship="follow"
)
if "include_blocks" in job.required:
upsert_user_blocks(job.user, job.import_data.get("blocks", []))
for remote_id in job.import_data.get("blocks", []):
UserRelationshipImport.objects.create(
parent_job=job, remote_id=remote_id, relationship="block"
)
process_books(job, tar)
for item in UserRelationshipImport.objects.filter(parent_job=job).all():
item.start_job()
for data in job.import_data.get("books"):
book_job = UserImportBook.objects.create(parent_job=job, book_data=data)
book_job.start_job()
job.set_status("complete")
archive_file.close()
except Exception as err: # pylint: disable=broad-except
@ -70,89 +197,191 @@ def start_import_task(**kwargs):
job.set_status("failed")
def process_books(job, tar):
"""
Process user import data related to books
We always import the books even if not assigning
them to shelves, lists etc
"""
@app.task(queue=IMPORTS, base=SubTask)
def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-branches
"""Take work and edition data,
find or create the edition and work in the database"""
books = job.import_data.get("books")
task = UserImportBook.objects.get(id=kwargs["child_id"])
job = task.parent_job
archive_file = job.bookwyrmimportjob.archive_file
book_data = task.book_data
for data in books:
book = get_or_create_edition(data, tar)
if task.complete or job.status == "stopped":
return
if "include_shelves" in job.required:
upsert_shelves(book, job.user, data)
try:
edition = book_data.get("edition")
work = book_data.get("work")
book = models.Edition.find_existing(edition)
if not book:
# make sure we have the authors in the local DB
# replace the old author ids in the edition JSON
edition["authors"] = []
work["authors"] = []
for author in book_data.get("authors"):
instance = activitypub.parse(author).to_model(
model=models.Author, save=True, overwrite=False
)
if "include_readthroughs" in job.required:
upsert_readthroughs(data.get("readthroughs"), job.user, book.id)
edition["authors"].append(instance.remote_id)
work["authors"].append(instance.remote_id)
if "include_comments" in job.required:
upsert_statuses(
job.user, models.Comment, data.get("comments"), book.remote_id
)
if "include_quotations" in job.required:
upsert_statuses(
job.user, models.Quotation, data.get("quotations"), book.remote_id
# we will add the cover later from the tar
# don't try to load it from the old server
cover = edition.get("cover", {})
cover_path = cover.get("url", None)
edition["cover"] = {}
# first we need the parent work to exist
work["editions"] = []
work_instance = activitypub.parse(work).to_model(
model=models.Work, save=True, overwrite=False
)
if "include_reviews" in job.required:
upsert_statuses(
job.user, models.Review, data.get("reviews"), book.remote_id
# now we have a work we can add it to the edition
# and create the edition model instance
edition["work"] = work_instance.remote_id
book = activitypub.parse(edition).to_model(
model=models.Edition, save=True, overwrite=False
)
if "include_lists" in job.required:
upsert_lists(job.user, data.get("lists"), book.id)
# set the cover image from the tar
if cover_path:
archive_file.open("rb")
with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar:
tar.write_image_to_file(cover_path, book.cover)
archive_file.close()
task.book = book
task.save(update_fields=["book"])
required = task.parent_job.bookwyrmimportjob.required
def get_or_create_edition(book_data, tar):
"""Take a JSON string of work and edition data,
find or create the edition and work in the database and
return an edition instance"""
if "include_shelves" in required:
upsert_shelves(task.parent_job.user, book, book_data.get("shelves"))
edition = book_data.get("edition")
existing = models.Edition.find_existing(edition)
if existing:
return existing
if "include_readthroughs" in required:
upsert_readthroughs(
task.parent_job.user, book.id, book_data.get("readthroughs")
)
# make sure we have the authors in the local DB
# replace the old author ids in the edition JSON
edition["authors"] = []
for author in book_data.get("authors"):
parsed_author = activitypub.parse(author)
instance = parsed_author.to_model(
model=models.Author, save=True, overwrite=True
if "include_lists" in required:
upsert_lists(task.parent_job.user, book.id, book_data.get("lists"))
except Exception as err: # pylint: disable=broad-except
logger.exception(
"Book Import Task %s for Job %s Failed with error: %s", task.id, job.id, err
)
task.fail_reason = _("unknown")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
edition["authors"].append(instance.remote_id)
# Now import statuses
# These are also subtasks so that we can isolate anything that fails
if "include_comments" in job.bookwyrmimportjob.required:
for status in book_data.get("comments"):
UserImportPost.objects.create(
parent_job=task.parent_job,
json=status,
book=book,
status_type=UserImportPost.StatusType.COMMENT,
)
# we will add the cover later from the tar
# don't try to load it from the old server
cover = edition.get("cover", {})
cover_path = cover.get("url", None)
edition["cover"] = {}
if "include_quotations" in job.bookwyrmimportjob.required:
for status in book_data.get("quotations"):
UserImportPost.objects.create(
parent_job=task.parent_job,
json=status,
book=book,
status_type=UserImportPost.StatusType.QUOTE,
)
# first we need the parent work to exist
work = book_data.get("work")
work["editions"] = []
parsed_work = activitypub.parse(work)
work_instance = parsed_work.to_model(model=models.Work, save=True, overwrite=True)
if "include_reviews" in job.bookwyrmimportjob.required:
for status in book_data.get("reviews"):
UserImportPost.objects.create(
parent_job=task.parent_job,
json=status,
book=book,
status_type=UserImportPost.StatusType.REVIEW,
)
# now we have a work we can add it to the edition
# and create the edition model instance
edition["work"] = work_instance.remote_id
parsed_edition = activitypub.parse(edition)
book = parsed_edition.to_model(model=models.Edition, save=True, overwrite=True)
for item in UserImportPost.objects.filter(parent_job=job).all():
item.start_job()
# set the cover image from the tar
if cover_path:
tar.write_image_to_file(cover_path, book.cover)
return book
task.complete_job()
def upsert_readthroughs(data, user, book_id):
@app.task(queue=IMPORTS, base=SubTask)
def upsert_status_task(**kwargs):
"""Find or create book statuses"""
task = UserImportPost.objects.get(id=kwargs["child_id"])
job = task.parent_job
user = job.user
status = task.json
status_class = (
models.Review
if task.status_type == "review"
else models.Quotation
if task.status_type == "quote"
else models.Comment
)
if task.complete or job.status == "stopped":
return
try:
# only add statuses if this is the same user
if is_alias(user, status.get("attributedTo", False)):
status["attributedTo"] = user.remote_id
status["to"] = update_followers_address(user, status["to"])
status["cc"] = update_followers_address(user, status["cc"])
status[
"replies"
] = (
{}
) # this parses incorrectly but we can't set it without knowing the new id
status["inReplyToBook"] = task.book.remote_id
parsed = activitypub.parse(status)
if not status_already_exists(
user, parsed
): # don't duplicate posts on multiple import
instance = parsed.to_model(
model=status_class, save=True, overwrite=True
)
for val in [
"progress",
"progress_mode",
"position",
"endposition",
"position_mode",
]:
if status.get(val):
instance.val = status[val]
instance.remote_id = instance.get_remote_id() # update the remote_id
instance.save() # save and broadcast
task.complete_job()
else:
logger.warning(
"User not authorized to import statuses, or status is tombstone"
)
task.fail_reason = _("unauthorized")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
except Exception as err: # pylint: disable=broad-except
logger.exception("User Import Task %s Failed with error: %s", task.id, err)
task.fail_reason = _("unknown")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
def upsert_readthroughs(user, book_id, data):
"""Take a JSON string of readthroughs and
find or create the instances in the database"""
@ -176,49 +405,11 @@ def upsert_readthroughs(data, user, book_id):
models.ReadThrough.objects.create(**obj)
def upsert_statuses(user, cls, data, book_remote_id):
"""Take a JSON string of a status and
find or create the instances in the database"""
for status in data:
if is_alias(
user, status["attributedTo"]
): # don't let l33t hax0rs steal other people's posts
# update ids and remove replies
status["attributedTo"] = user.remote_id
status["to"] = update_followers_address(user, status["to"])
status["cc"] = update_followers_address(user, status["cc"])
status[
"replies"
] = (
{}
) # this parses incorrectly but we can't set it without knowing the new id
status["inReplyToBook"] = book_remote_id
parsed = activitypub.parse(status)
if not status_already_exists(
user, parsed
): # don't duplicate posts on multiple import
instance = parsed.to_model(model=cls, save=True, overwrite=True)
for val in [
"progress",
"progress_mode",
"position",
"endposition",
"position_mode",
]:
if status.get(val):
instance.val = status[val]
instance.remote_id = instance.get_remote_id() # update the remote_id
instance.save() # save and broadcast
else:
logger.warning("User does not have permission to import statuses")
def upsert_lists(user, lists, book_id):
def upsert_lists(
user,
book_id,
lists,
):
"""Take a list of objects each containing
a list and list item as AP objects
@ -254,11 +445,10 @@ def upsert_lists(user, lists, book_id):
)
def upsert_shelves(book, user, book_data):
def upsert_shelves(user, book, shelves):
"""Take shelf JSON objects and create
DB entries if they don't already exist"""
shelves = book_data["shelves"]
for shelf in shelves:
book_shelf = models.Shelf.objects.filter(name=shelf["name"], user=user).first()
@ -275,6 +465,10 @@ def upsert_shelves(book, user, book_data):
)
# user updates
##############
def update_user_profile(user, tar, data):
"""update the user's profile from import data"""
name = data.get("name", None)
@ -315,14 +509,6 @@ def update_user_settings(user, data):
user.save(update_fields=update_fields)
@app.task(queue=IMPORTS, base=SubTask)
def update_user_settings_task(job_id):
"""wrapper task for user's settings import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return update_user_settings(parent_job.user, parent_job.import_data.get("user"))
def update_goals(user, data):
"""update the user's goals from import data"""
@ -340,14 +526,6 @@ def update_goals(user, data):
models.AnnualGoal.objects.create(**goal)
@app.task(queue=IMPORTS, base=SubTask)
def update_goals_task(job_id):
"""wrapper task for user's goals import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return update_goals(parent_job.user, parent_job.import_data.get("goals"))
def upsert_saved_lists(user, values):
"""Take a list of remote ids and add as saved lists"""
@ -358,67 +536,85 @@ def upsert_saved_lists(user, values):
@app.task(queue=IMPORTS, base=SubTask)
def upsert_saved_lists_task(job_id):
"""wrapper task for user's saved lists import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
def import_user_relationship_task(**kwargs):
"""import a user follow or block from an import file"""
return upsert_saved_lists(
parent_job.user, parent_job.import_data.get("saved_lists")
)
task = UserRelationshipImport.objects.get(id=kwargs["child_id"])
job = task.parent_job
try:
if task.relationship == "follow":
def upsert_follows(user, values):
"""Take a list of remote ids and add as follows"""
for remote_id in values:
followee = activitypub.resolve_remote_id(remote_id, models.User)
if followee:
(follow_request, created,) = models.UserFollowRequest.objects.get_or_create(
user_subject=user,
user_object=followee,
)
if not created:
# this request probably failed to connect with the remote
# and should save to trigger a re-broadcast
follow_request.save()
@app.task(queue=IMPORTS, base=SubTask)
def upsert_follows_task(job_id):
"""wrapper task for user's follows import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return upsert_follows(parent_job.user, parent_job.import_data.get("follows"))
def upsert_user_blocks(user, user_ids):
"""block users"""
for user_id in user_ids:
user_object = activitypub.resolve_remote_id(user_id, models.User)
if user_object:
exists = models.UserBlocks.objects.filter(
user_subject=user, user_object=user_object
).exists()
if not exists:
models.UserBlocks.objects.create(
user_subject=user, user_object=user_object
followee = activitypub.resolve_remote_id(task.remote_id, models.User)
if followee:
(
follow_request,
created,
) = models.UserFollowRequest.objects.get_or_create(
user_subject=job.user,
user_object=followee,
)
# remove the blocked users's lists from the groups
models.List.remove_from_group(user, user_object)
# remove the blocked user from all blocker's owned groups
models.GroupMember.remove(user, user_object)
if not created:
# this request probably failed to connect with the remote
# and should save to trigger a re-broadcast
follow_request.save()
task.complete_job()
else:
logger.exception(
"Could not resolve user %s task %s", task.remote_id, task.id
)
task.fail_reason = _("connection_error")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
elif task.relationship == "block":
user_object = activitypub.resolve_remote_id(task.remote_id, models.User)
if user_object:
exists = models.UserBlocks.objects.filter(
user_subject=job.user, user_object=user_object
).exists()
if not exists:
models.UserBlocks.objects.create(
user_subject=job.user, user_object=user_object
)
# remove the blocked users's lists from the groups
models.List.remove_from_group(job.user, user_object)
# remove the blocked user from all blocker's owned groups
models.GroupMember.remove(job.user, user_object)
task.complete_job()
else:
logger.exception(
"Could not resolve user %s task %s", task.remote_id, task.id
)
task.fail_reason = _("connection_error")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
else:
logger.exception(
"Invalid relationship %s type specified in task %s",
task.relationship,
task.id,
)
task.fail_reason = _("invalid_relationship")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
except Exception as err: # pylint: disable=broad-except
logger.exception("User Import Task %s Failed with error: %s", task.id, err)
task.fail_reason = _("unknown")
task.save(update_fields=["fail_reason"])
task.set_status("failed")
@app.task(queue=IMPORTS, base=SubTask)
def upsert_user_blocks_task(job_id):
"""wrapper task for user's blocks import"""
parent_job = BookwyrmImportJob.objects.get(id=job_id)
return upsert_user_blocks(
parent_job.user, parent_job.import_data.get("blocked_users")
)
# utilities
###########
def update_followers_address(user, field):
@ -433,19 +629,21 @@ def update_followers_address(user, field):
def is_alias(user, remote_id):
"""check that the user is listed as movedTo or also_known_as
in the remote user's profile"""
"""check that the user is listed as moved_to
or also_known_as in the remote user's profile"""
if not remote_id:
return False
remote_user = activitypub.resolve_remote_id(
remote_id=remote_id, model=models.User, save=False
)
if remote_user:
if remote_user.moved_to:
if getattr(remote_user, "moved_to", None) is not None:
return user.remote_id == remote_user.moved_to
if remote_user.also_known_as:
if hasattr(remote_user, "also_known_as"):
return user in remote_user.also_known_as.all()
return False

View file

@ -29,6 +29,7 @@ class Job(models.Model):
status = models.CharField(
max_length=50, choices=Status.choices, default=Status.PENDING, null=True
)
fail_reason = models.TextField(null=True)
class Meta:
"""Make it abstract"""
@ -133,7 +134,8 @@ class ParentJob(Job):
tasks = self.pending_child_jobs.filter(task_id__isnull=False).values_list(
"task_id", flat=True
)
app.control.revoke(list(tasks))
tasklist = [str(task) for task in list(tasks)]
app.control.revoke(tasklist)
self.pending_child_jobs.update(status=self.Status.STOPPED)
@ -208,7 +210,7 @@ class ParentTask(app.Task):
job.task_id = task_id
job.save(update_fields=["task_id"])
if kwargs["no_children"]:
if kwargs.get("no_children"):
job.set_status(ChildJob.Status.ACTIVE)
def on_success(
@ -233,7 +235,7 @@ class ParentTask(app.Task):
None: The return value of this handler is ignored.
"""
if kwargs["no_children"]:
if kwargs.get("no_children"):
job = ParentJob.objects.get(id=kwargs["job_id"])
job.complete_job()
@ -247,7 +249,7 @@ class SubTask(app.Task):
"""
def before_start(
self, task_id, *args, **kwargs
self, task_id, args, kwargs
): # pylint: disable=no-self-use, unused-argument
"""Handler called before the task starts. Override.
@ -271,7 +273,7 @@ class SubTask(app.Task):
child_job.set_status(ChildJob.Status.ACTIVE)
def on_success(
self, retval, task_id, *args, **kwargs
self, retval, task_id, args, kwargs
): # pylint: disable=no-self-use, unused-argument
"""Run by the worker if the task executes successfully. Override.

View file

@ -29,10 +29,25 @@
</div>
{% elif next_available %}
<div class="notification is-warning">
<p>{% blocktrans %}Currently you are allowed to import one user every {{ user_import_hours }} hours.{% endblocktrans %}</p>
<p>{% blocktrans %}You will next be able to import a user file at {{ next_available }}{% endblocktrans %}</p>
<p>{% blocktrans with hours=next_available.1 %}Currently you are allowed to import one user every {{ hours }} hours.{% endblocktrans %}</p>
<p>{% blocktrans with next_time=next_available.0 %}You will next be able to import a user file at {{ next_time }}{% endblocktrans %}</p>
</div>
{% else %}
{% if recent_avg_hours or recent_avg_minutes %}
<div class="notification">
<p>
{% if recent_avg_hours %}
{% blocktrans trimmed with hours=recent_avg_hours|floatformat:0|intcomma %}
On average, recent imports have taken {{ hours }} hours.
{% endblocktrans %}
{% else %}
{% blocktrans trimmed with minutes=recent_avg_minutes|floatformat:0|intcomma %}
On average, recent imports have taken {{ minutes }} minutes.
{% endblocktrans %}
{% endif %}
</p>
</div>
{% endif %}
<form class="box content" name="import-user" action="/user-import" method="post" enctype="multipart/form-data">
{% csrf_token %}
@ -186,10 +201,10 @@
{% endif %}
{% for job in jobs %}
<tr>
<td><a href="{% url 'user-import-status' job.id %}">{{ job.created_date }}</a></td>
<td>
<p>{{ job.created_date }}</p>
<p>{{ job.updated_date }}</p>
</td>
<td>{{ job.updated_date }}</td>
<td>
<span
{% if job.status == "stopped" or job.status == "failed" %}
@ -197,14 +212,13 @@
{% elif job.status == "pending" %}
class="tag is-warning"
{% elif job.complete %}
class="tag"
{% else %}
class="tag is-success"
{% else %}
class="tag"
{% endif %}
>
{% if job.status %}
{{ job.status }}
{{ job.status_display }}
{{ job.get_status_display }}
{% elif job.complete %}
{% trans "Complete" %}
{% else %}

View file

@ -0,0 +1,210 @@
{% extends 'layout.html' %}
{% load i18n %}
{% load humanize %}
{% load static %}
{% block title %}{% trans "User Import Status" %}{% endblock %}
{% block content %}{% spaceless %}
<header class="block">
<h1 class="title">
{% block page_title %}
{% if job.retry %}
{% trans "User Import Retry Status" %}
{% else %}
{% trans "User Import Status" %}
{% endif %}
{% endblock %}
</h1>
<nav class="breadcrumb subtitle" aria-label="breadcrumbs">
<ul>
<li><a href="{% url 'user-import' %}">{% trans "User Imports" %}</a></li>
{% url 'user-import-status' job.id as path %}
<li{% if request.path in path %} class="is-active"{% endif %}>
<a href="{{ path }}" {% if request.path in path %}aria-current="page"{% endif %}>
{% trans "User Import Status" %}
</a>
</li>
{% block breadcrumbs %}{% endblock %}
</ul>
</nav>
<div class="block">
<dl>
<dt class="is-pulled-left mr-5 has-text-weight-bold">{% trans "Import started:" %}</dt>
<dd>{{ job.created_date | naturaltime }}</dd>
<dt class="is-pulled-left mr-5 has-text-weight-bold">Import Job Status: </dt>
<dd>
<span
{% if job.status == "stopped" or job.status == "failed" %}
class="tag is-danger"
{% elif job.status == "pending" %}
class="tag is-warning"
{% elif job.complete %}
class="tag"
{% else %}
class="tag is-success"
{% endif %}
>
{% if job.status %}
{{ job.status }}
{{ job.status_display }}
{% elif job.complete %}
{% trans "Complete" %}
{% else %}
{% trans "Active" %}
{% endif %}
</span>
</dd>
</dl>
</div>
{% block import_counts %}
<div class="block">
<div class="table-container">
<table class="table is-striped is-fullwidth">
<tr>
<th></th>
<th class="has-text-centered">{% trans "Imported" %}</th>
<th class="has-text-centered">{% trans "Failed" %}</th>
<th class="has-text-centered">{% trans "Total" %}</th>
</tr>
<tr>
<th>{% trans "Books" %}</th>
<td class="has-text-centered">{{ completed_books_count }}</td>
<td class="has-text-centered">{{ failed_books_count }}</td>
<td class="has-text-centered">{{ book_jobs_count }}</td>
</tr>
<tr>
<th>{% trans "Statuses" %}</th>
<td class="has-text-centered">{{ completed_statuses_count }}</td>
<td class="has-text-centered">{{ failed_statuses_count }}</td>
<td class="has-text-centered">{{ status_jobs_count }}</td>
</tr>
<tr>
<th>{% trans "Follows & Blocks" %}</th>
<td class="has-text-centered">{{ completed_relationships_count }}</td>
<td class="has-text-centered">{{ failed_relationships_count }}</td>
<td class="has-text-centered">{{ relationship_jobs_count }}</td>
</tr>
</table>
</div>
</div>
{% endblock %}
{% if job.status == "active" and show_progress %}
<div class="box is-processing">
<div class="block">
<span class="icon icon-spinner is-pulled-left" aria-hidden="true"></span>
<span>{% trans "In progress" %}</span>
<span class="is-pulled-right">
<a href="{% url 'user-import-status' job.id %}" class="button is-small">{% trans "Refresh" %}</a>
</span>
</div>
<div class="is-flex">
<progress
class="progress is-success is-medium mr-2"
role="progressbar"
aria-min="0"
value="{{ complete_count }}"
aria-valuenow="{{ complete_count }}"
max="{{ item_count }}"
aria-valuemax="{{ item_count }}">
{{ percent }} %
</progress>
<span>{{ percent }}%</span>
</div>
</div>
{% endif %}
{% if not job.complete %}
<form name="stop-import" action="{% url 'user-import-stop' job.id %}" method="POST">
{% csrf_token %}
<button class="button is-danger" type="submit">{% trans "Stop import" %}</button>
</form>
{% endif %}
{% if job.complete and fail_count and not job.retry %}
<div class="notification is-warning">
{% blocktrans trimmed count counter=fail_count with display_counter=fail_count|intcomma %}
{{ display_counter }} item failed to import.
{% plural %}
{{ display_counter }} items failed to import.
{% endblocktrans %}
<a href="{% url 'user-import-troubleshoot' job.id %}">
{% trans "View and troubleshoot failed items" %}
</a>
</div>
{% endif %}
</header>
<div class="block">
{% block actions %}{% endblock %}
{% block item_list %}
<h2 class="title">{% trans "Imported books" %}</h2>
<div class="table-container">
<table class="table is-striped is-fullwidth">
<tr>
<th>
{% trans "Title" %}
</th>
<th>
{% trans "ISBN" %}
</th>
<th>
{% trans "Authors" %}
</th>
{% block import_cols_headers %}
<th>
{% trans "Book" %}
</th>
<th>
{% trans "Status" %}
</th>
{% endblock %}
</tr>
{% for item in items %}
<tr>
<td>
{{ item.book_data.edition.title }}
</td>
<td>
{{ item.book_data.edition.isbn13|default:'' }}
</td>
<td>
{% for author in item.book_data.authors %}
<p>{{ author.name }}</p>
{% endfor %}
</td>
{% block import_cols %}
<td>
{% if item.book %}
<a href="{{ item.book.local_path }}">
{% include 'snippets/book_cover.html' with book=item.book cover_class='is-h-s' size='small' %}
</a>
{% endif %}
</td>
<td>
{% if item.book %}
<span class="icon icon-check" aria-hidden="true"></span>
<span class="is-sr-only-mobile">{% trans "Imported" %}</span>
{% else %}
<div class="is-flex">
<span class="is-sr-only-mobile">{{ item.status }}</span>
</div>
{% endif %}
</td>
{% endblock %}
</tr>
{% block action_row %}{% endblock %}
{% endfor %}
</table>
</div>
{% endblock %}
</div>
<div>
{% include 'snippets/pagination.html' with page=items path=page_path %}
</div>
{% endspaceless %}{% endblock %}

View file

@ -0,0 +1,91 @@
{% extends 'import/user_import_status.html' %}
{% load i18n %}
{% load utilities %}
{% block title %}{% trans "User Import Troubleshooting" %}{% endblock %}
{% block page_title %}
{% trans "Failed items" %}
{% endblock %}
{% block breadcrumbs %}
<li class="is-active">
<a href="#" aria-current="page">{% trans "Troubleshooting" %}</a>
</li>
{% endblock %}
{% block import_counts %}{% endblock %}
{% block actions %}
<div class="block">
<div class="notification content">
<p>
{% trans "Re-trying an import can fix missing items in cases such as:" %}
</p>
<ul>
<li>{% trans "Your account was not set as an alias of the original user account" %}</li>
<li>{% trans "A transient error or timeout caused the external data source to be unavailable." %}</li>
<li>{% trans "BookWyrm has been updated since this import with a bug fix" %}</li>
</ul>
<p>
{% trans "Re-trying an import will not work in cases such as:" %}
</p>
<ul>
<li>{% trans "A user, status, or BookWyrm server was deleted after your import file was created" %}</li>
<li>{% trans "Importing statuses when your old account has been deleted" %}</li>
</ul>
<p>
{% trans "Contact your admin or <a href='https://github.com/bookwyrm-social/bookwyrm/issues'>open an issue</a> if you are seeing unexpected failed items." %}
</p>
</div>
{% if next_available %}
<div class="notification is-warning">
<p>{% blocktrans with hours=next_available.1 %}Currently you are allowed to import or retry one user every {{ hours }} hours.{% endblocktrans %}</p>
<p>{% blocktrans with next_time=next_available.0 %}You will be able to retry this import at {{ next_time }}{% endblocktrans %}</p>
</div>
{% else %}
<form name="retry" method="post" action="{% url 'user-import-troubleshoot' job.id %}">
{% csrf_token %}
<button type="submit" class="button">Retry all</button>
</form>
{% endif %}
</div>
{% endblock %}
{% block item_list %}
<div class="table-container">
<table class="table is-striped is-fullwidth">
<tr>
<th>
{% trans "Book" %}
</th>
<th>
{% trans "Status" %}
</th>
<th>
{% trans "Relationship" %}
</th>
<th>
{% trans "Reason" %}
</th>
</tr>
{% for item in items %}
<tr>
<td class="is-italic">{{ item.userimportpost.book.title }}</td>
<td>{{ item.userimportpost.json.type }}</td>
<td>{% id_to_username item.userrelationshipimport.remote_id True %}</td>
<td>
{% if item.fail_reason == "unauthorized" %}
Not authorized to import statuses
{% elif item.fail_reason == "connection_error" %}
Could not connect to remote identity
{% elif item.fail_reason == "invalid_relationship" %}
Invalid relationship type - please log an issue
{% else %}
Unknown error
{% endif %}
</td>
</tr>
{% endfor %}
</table>
</div>
{% endblock %}

View file

@ -1,5 +1,6 @@
{% extends 'preferences/layout.html' %}
{% load i18n %}
{% load humanize %}
{% load utilities %}
{% block title %}{% trans "Export BookWyrm Account" %}{% endblock %}
@ -48,12 +49,12 @@
<p class="notification is-danger">
{% trans "New user exports are currently disabled." %}
{% if perms.bookwyrm.edit_instance_settings %}
<br/>
{% url 'settings-imports' as url %}
{% blocktrans trimmed %}
User exports settings can be changed from <a href="{{ url }}">the Imports page</a> in the Admin dashboard.
{% endblocktrans %}
{% endif%}
<br/>
{% url 'settings-imports' as url %}
{% blocktrans trimmed %}
User exports settings can be changed from <a href="{{ url }}">the Imports page</a> in the Admin dashboard.
{% endblocktrans %}
{% endif%}
</p>
{% elif next_available %}
<p class="notification is-warning">
@ -61,7 +62,25 @@
You will be able to create a new export file at {{ next_available }}
{% endblocktrans %}
</p>
{% else %}
{% if recent_avg_hours or recent_avg_minutes %}
<div class="notification">
<p>
{% if recent_avg_hours %}
{% blocktrans trimmed with hours=recent_avg_hours|floatformat:0|intcomma %}
On average, recent exports have taken {{ hours }} hours.
{% endblocktrans %}
{% else %}
{% blocktrans trimmed with minutes=recent_avg_minutes|floatformat:0|intcomma %}
On average, recent exports have taken {{ minutes }} minutes.
{% endblocktrans %}
{% endif %}
</p>
</div>
{% endif %}
<form name="export" method="POST" href="{% url 'prefs-user-export' %}">
{% csrf_token %}
<button type="submit" class="button">
@ -107,14 +126,13 @@
{% elif export.job.status == "pending" %}
class="tag is-warning"
{% elif export.job.complete %}
class="tag"
{% else %}
class="tag is-success"
{% else %}
class="tag"
{% endif %}
>
{% if export.job.status %}
{{ export.job.status }}
{{ export.job.status_display }}
{{ export.job.get_status_display }}
{% elif export.job.complete %}
{% trans "Complete" %}
{% else %}

View file

@ -116,7 +116,7 @@ def get_isni(existing, author, autoescape=True):
@register.simple_tag(takes_context=False)
def id_to_username(user_id):
def id_to_username(user_id, return_empty=False):
"""given an arbitrary remote id, return the username"""
if user_id:
url = urlparse(user_id)
@ -126,6 +126,10 @@ def id_to_username(user_id):
value = f"{name}@{domain}"
return value
if return_empty:
return ""
return _("a new user account")

View file

@ -86,7 +86,7 @@
"id": "https://www.example.com/book/2",
"type": "Edition",
"openlibraryKey": "OL680025M",
"title": "Seeking Like A State",
"title": "Seeing Like A State",
"sortTitle": "seeing like a state",
"subtitle": "",
"description": "<p>Examines how (sometimes quasi-) authoritarian high-modernist planning fails to deliver the goods, be they increased resources for the state or a better life for the people.</p>",

View file

@ -0,0 +1,46 @@
""" testing bookwyrm user import """
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import models
from bookwyrm.importers import BookwyrmImporter
class BookwyrmUserImport(TestCase):
"""importing from BookWyrm user import"""
def setUp(self):
"""setting stuff up"""
with (
patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"),
patch("bookwyrm.activitystreams.populate_stream_task.delay"),
patch("bookwyrm.lists_stream.populate_lists_task.delay"),
patch("bookwyrm.suggested_users.rerank_user_task.delay"),
):
self.user = models.User.objects.create_user(
"mouse", "mouse@mouse.mouse", "password", local=True, localname="mouse"
)
def test_create_retry_job(self):
"""test retrying a user import"""
job = models.bookwyrm_import_job.BookwyrmImportJob.objects.create(
user=self.user, required=[]
)
job.complete_job()
self.assertEqual(job.retry, False)
self.assertEqual(
models.bookwyrm_import_job.BookwyrmImportJob.objects.count(), 1
)
# retry the job
importer = BookwyrmImporter()
importer.create_retry_job(user=self.user, original_job=job)
retry_job = models.bookwyrm_import_job.BookwyrmImportJob.objects.last()
self.assertEqual(
models.bookwyrm_import_job.BookwyrmImportJob.objects.count(), 2
)
self.assertEqual(retry_job.retry, True)
self.assertNotEqual(job.id, retry_job.id)

View file

@ -1,6 +1,7 @@
""" testing import """
from collections import namedtuple
import pathlib
import io
from unittest.mock import patch
import datetime
@ -159,22 +160,11 @@ class GenericImporter(TestCase):
def test_complete_job(self, *_):
"""test notification"""
import_job = self.importer.create_job(
self.local_user, self.csv, False, "unlisted"
)
items = import_job.items.all()
for item in items[:3]:
item.fail_reason = "hello"
item.save()
item.update_job()
self.assertFalse(
models.Notification.objects.filter(
user=self.local_user,
related_import=import_job,
notification_type="IMPORT",
).exists()
)
# csv content not important
csv = io.StringIO("title,author_text,remote_id\nbeep,boop,blurp")
import_job = self.importer.create_job(self.local_user, csv, False, "unlisted")
items = import_job.items.all()
item = items.last()
item.fail_reason = "hello"
item.save()

View file

@ -1,19 +1,21 @@
""" testing models """
import json
import os
import pathlib
from unittest.mock import patch
from django.core.files import File
from django.db.models import Q
from django.utils.dateparse import parse_datetime
from django.test import TestCase
from bookwyrm import models
from bookwyrm import activitypub, models
from bookwyrm.utils.tar import BookwyrmTarFile
from bookwyrm.models import bookwyrm_import_job
class BookwyrmImport(TestCase):
class BookwyrmImport(TestCase): # pylint: disable=too-many-public-methods
"""testing user import functions"""
def setUp(self):
@ -49,8 +51,9 @@ class BookwyrmImport(TestCase):
"badger",
"badger@badger.badger",
"password",
local=True,
local=False,
localname="badger",
remote_id="badger@remote.remote",
)
self.work = models.Work.objects.create(title="Sand Talk")
@ -71,8 +74,14 @@ class BookwyrmImport(TestCase):
with open(self.json_file, "r", encoding="utf-8") as jsonfile:
self.json_data = json.loads(jsonfile.read())
self.archive_file = pathlib.Path(__file__).parent.joinpath(
"../data/bookwyrm_account_export.tar.gz"
self.archive_file_path = os.path.relpath(
pathlib.Path(__file__).parent.joinpath(
"../data/bookwyrm_account_export.tar.gz"
)
)
self.job = bookwyrm_import_job.BookwyrmImportJob.objects.create(
user=self.local_user, required=[]
)
def test_update_user_profile(self):
@ -84,7 +93,7 @@ class BookwyrmImport(TestCase):
patch("bookwyrm.suggested_users.rerank_user_task.delay"),
):
with (
open(self.archive_file, "rb") as fileobj,
open(self.archive_file_path, "rb") as fileobj,
BookwyrmTarFile.open(mode="r:gz", fileobj=fileobj) as tarfile,
):
models.bookwyrm_import_job.update_user_profile(
@ -195,8 +204,14 @@ class BookwyrmImport(TestCase):
self.assertTrue(self.local_user.saved_lists.filter(id=book_list.id).exists())
def test_upsert_follows(self):
"""Test take a list of remote ids and add as follows"""
def test_follow_relationship(self):
"""Test take a remote ID and create a follow"""
task = bookwyrm_import_job.UserRelationshipImport.objects.create(
parent_job=self.job,
relationship="follow",
remote_id="https://blah.blah/user/rat",
)
before_follow = models.UserFollows.objects.filter(
user_subject=self.local_user, user_object=self.rat_user
@ -208,18 +223,168 @@ class BookwyrmImport(TestCase):
patch("bookwyrm.activitystreams.add_user_statuses_task.delay"),
patch("bookwyrm.lists_stream.add_user_lists_task.delay"),
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
patch("bookwyrm.activitypub.resolve_remote_id", return_value=self.rat_user),
):
models.bookwyrm_import_job.upsert_follows(
self.local_user, self.json_data.get("follows")
)
bookwyrm_import_job.import_user_relationship_task(child_id=task.id)
after_follow = models.UserFollows.objects.filter(
user_subject=self.local_user, user_object=self.rat_user
).exists()
self.assertTrue(after_follow)
def test_upsert_user_blocks(self):
"""test adding blocked users"""
def test_import_book_task_existing_author(self):
"""Test importing a book with an author
already known to the server does not overwrite"""
self.assertEqual(models.Author.objects.count(), 0)
models.Author.objects.create(
id=1,
name="James C. Scott",
wikipedia_link="https://en.wikipedia.org/wiki/James_C._Scott",
wikidata="Q3025403",
aliases=["Test Alias"],
)
with open(self.archive_file_path, "rb") as fileobj:
self.job.archive_file = File(fileobj)
self.job.save()
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job, book_data=self.json_data.get("books")[0]
)
self.assertEqual(models.Edition.objects.count(), 1)
# run the task
bookwyrm_import_job.import_book_task(child_id=task.id)
self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists())
self.assertEqual(models.Edition.objects.count(), 2)
# Check the existing author did not get overwritten
author = models.Author.objects.first()
self.assertEqual(author.name, "James C. Scott")
self.assertIn(author.aliases[0], "Test Alias")
def test_import_book_task_existing_edition(self):
"""Test importing a book with an edition
already known to the server does not overwrite"""
with open(self.archive_file_path, "rb") as fileobj:
self.job.archive_file = File(fileobj)
self.job.save()
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job, book_data=self.json_data.get("books")[1]
)
self.assertEqual(models.Edition.objects.count(), 1)
self.assertTrue(models.Edition.objects.filter(isbn_13="9780062975645").exists())
# run the task
bookwyrm_import_job.import_book_task(child_id=task.id)
# Check the existing Edition did not get overwritten
self.assertEqual(models.Edition.objects.count(), 1)
self.assertEqual(models.Edition.objects.first().title, "Sand Talk")
def test_import_book_task_existing_work(self):
"""Test importing a book with a work unknown to the server"""
with open(self.archive_file_path, "rb") as fileobj:
self.job.archive_file = File(fileobj)
self.job.save()
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job, book_data=self.json_data.get("books")[1]
)
self.assertEqual(models.Work.objects.count(), 1)
# run the task
bookwyrm_import_job.import_book_task(child_id=task.id)
# Check the existing Work did not get overwritten
self.assertEqual(models.Work.objects.count(), 1)
self.assertNotEqual(
self.json_data.get("books")[1]["work"]["title"], models.Work.objects.first()
)
def test_import_book_task_new_author(self):
"""Test importing a book with author not known
to the server imports the new author"""
with open(self.archive_file_path, "rb") as fileobj:
self.job.archive_file = File(fileobj)
self.job.save()
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job, book_data=self.json_data.get("books")[0]
)
self.assertEqual(models.Edition.objects.count(), 1)
# run the task
bookwyrm_import_job.import_book_task(child_id=task.id)
self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists())
self.assertEqual(models.Edition.objects.count(), 2)
# Check the author was created
author = models.Author.objects.get()
self.assertEqual(author.name, "James C. Scott")
self.assertIn(author.aliases[0], "James Campbell Scott")
def test_import_book_task_new_edition(self):
"""Test importing a book with an edition
unknown to the server"""
with open(self.archive_file_path, "rb") as fileobj:
self.job.archive_file = File(fileobj)
self.job.save()
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job, book_data=self.json_data.get("books")[0]
)
self.assertEqual(models.Edition.objects.count(), 1)
self.assertFalse(
models.Edition.objects.filter(isbn_13="9780300070163").exists()
)
# run the task
bookwyrm_import_job.import_book_task(child_id=task.id)
# Check the Edition was added
self.assertEqual(models.Edition.objects.count(), 2)
self.assertEqual(models.Edition.objects.first().title, "Sand Talk")
self.assertEqual(models.Edition.objects.last().title, "Seeing Like A State")
self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists())
def test_import_book_task_new_work(self):
"""Test importing a book with a work unknown to the server"""
with open(self.archive_file_path, "rb") as fileobj:
self.job.archive_file = File(fileobj)
self.job.save()
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job, book_data=self.json_data.get("books")[0]
)
self.assertEqual(models.Work.objects.count(), 1)
# run the task
bookwyrm_import_job.import_book_task(child_id=task.id)
# Check the Work was added
self.assertEqual(models.Work.objects.count(), 2)
self.assertEqual(models.Work.objects.first().title, "Sand Talk")
self.assertEqual(models.Work.objects.last().title, "Seeing Like a State")
def test_block_relationship(self):
"""test adding blocks for users"""
task = bookwyrm_import_job.UserRelationshipImport.objects.create(
parent_job=self.job,
relationship="block",
remote_id="https://blah.blah/user/badger",
)
blocked_before = models.UserBlocks.objects.filter(
Q(
@ -234,10 +399,11 @@ class BookwyrmImport(TestCase):
patch("bookwyrm.activitystreams.remove_user_statuses_task.delay"),
patch("bookwyrm.lists_stream.remove_user_lists_task.delay"),
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
patch(
"bookwyrm.activitypub.resolve_remote_id", return_value=self.badger_user
),
):
models.bookwyrm_import_job.upsert_user_blocks(
self.local_user, self.json_data.get("blocks")
)
bookwyrm_import_job.import_user_relationship_task(child_id=task.id)
blocked_after = models.UserBlocks.objects.filter(
Q(
@ -248,37 +414,29 @@ class BookwyrmImport(TestCase):
self.assertTrue(blocked_after)
def test_get_or_create_edition_existing(self):
"""Test take a JSON string of books and editions,
find or create the editions in the database and
return a list of edition instances"""
"""Test import existing book"""
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job,
book_data=self.json_data["books"][1],
)
self.assertEqual(models.Edition.objects.count(), 1)
with (
open(self.archive_file, "rb") as fileobj,
BookwyrmTarFile.open(mode="r:gz", fileobj=fileobj) as tarfile,
):
bookwyrm_import_job.get_or_create_edition(
self.json_data["books"][1], tarfile
) # Sand Talk
bookwyrm_import_job.import_book_task(child_id=task.id)
self.assertEqual(models.Edition.objects.count(), 1)
self.assertEqual(models.Edition.objects.count(), 1)
def test_get_or_create_edition_not_existing(self):
"""Test take a JSON string of books and editions,
find or create the editions in the database and
return a list of edition instances"""
"""Test import new book"""
task = bookwyrm_import_job.UserImportBook.objects.create(
parent_job=self.job,
book_data=self.json_data["books"][0],
)
self.assertEqual(models.Edition.objects.count(), 1)
with (
open(self.archive_file, "rb") as fileobj,
BookwyrmTarFile.open(mode="r:gz", fileobj=fileobj) as tarfile,
):
bookwyrm_import_job.get_or_create_edition(
self.json_data["books"][0], tarfile
) # Seeing like a state
bookwyrm_import_job.import_book_task(child_id=task.id)
self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists())
self.assertEqual(models.Edition.objects.count(), 2)
@ -305,7 +463,7 @@ class BookwyrmImport(TestCase):
self.assertEqual(models.ReadThrough.objects.count(), 0)
bookwyrm_import_job.upsert_readthroughs(
readthroughs, self.local_user, self.book.id
self.local_user, self.book.id, readthroughs
)
self.assertEqual(models.ReadThrough.objects.count(), 1)
@ -318,17 +476,19 @@ class BookwyrmImport(TestCase):
self.assertEqual(models.ReadThrough.objects.first().user, self.local_user)
def test_get_or_create_review(self):
"""Test get_or_create_review_status with a review"""
"""Test upsert_status_task with a review"""
task = bookwyrm_import_job.UserImportPost.objects.create(
parent_job=self.job,
book=self.book,
json=self.json_data["books"][0]["reviews"][0],
status_type="review",
)
self.assertEqual(models.Review.objects.filter(user=self.local_user).count(), 0)
reviews = self.json_data["books"][0]["reviews"]
with (
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True),
):
bookwyrm_import_job.upsert_statuses(
self.local_user, models.Review, reviews, self.book.remote_id
)
with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True):
bookwyrm_import_job.upsert_status_task(child_id=task.id)
self.assertEqual(models.Review.objects.filter(user=self.local_user).count(), 1)
self.assertEqual(
@ -354,18 +514,20 @@ class BookwyrmImport(TestCase):
)
def test_get_or_create_comment(self):
"""Test get_or_create_review_status with a comment"""
"""Test upsert_status_task with a comment"""
task = bookwyrm_import_job.UserImportPost.objects.create(
parent_job=self.job,
book=self.book,
json=self.json_data["books"][1]["comments"][0],
status_type="comment",
)
self.assertEqual(models.Comment.objects.filter(user=self.local_user).count(), 0)
comments = self.json_data["books"][1]["comments"]
with (
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True),
):
bookwyrm_import_job.upsert_statuses(
self.local_user, models.Comment, comments, self.book.remote_id
)
with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True):
bookwyrm_import_job.upsert_status_task(child_id=task.id)
self.assertEqual(models.Comment.objects.filter(user=self.local_user).count(), 1)
self.assertEqual(
models.Comment.objects.filter(book=self.book).first().content,
@ -382,20 +544,22 @@ class BookwyrmImport(TestCase):
)
def test_get_or_create_quote(self):
"""Test get_or_create_review_status with a quote"""
"""Test upsert_status_task with a quote"""
task = bookwyrm_import_job.UserImportPost.objects.create(
parent_job=self.job,
book=self.book,
json=self.json_data["books"][1]["quotations"][0],
status_type="quote",
)
self.assertEqual(
models.Quotation.objects.filter(user=self.local_user).count(), 0
)
quotes = self.json_data["books"][1]["quotations"]
with (
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True),
):
bookwyrm_import_job.upsert_statuses(
self.local_user, models.Quotation, quotes, self.book.remote_id
)
with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True):
bookwyrm_import_job.upsert_status_task(child_id=task.id)
self.assertEqual(
models.Quotation.objects.filter(user=self.local_user).count(), 1
)
@ -416,20 +580,20 @@ class BookwyrmImport(TestCase):
)
def test_get_or_create_quote_unauthorized(self):
"""Test get_or_create_review_status with a quote but not authorized"""
"""Test upsert_status_task with a quote but not authorized"""
task = bookwyrm_import_job.UserImportPost.objects.create(
parent_job=self.job,
book=self.book,
json=self.json_data["books"][1]["quotations"][0],
status="quote",
)
self.assertEqual(
models.Quotation.objects.filter(user=self.local_user).count(), 0
)
quotes = self.json_data["books"][1]["quotations"]
with (
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=False),
):
bookwyrm_import_job.upsert_statuses(
self.local_user, models.Quotation, quotes, self.book.remote_id
)
with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=False):
bookwyrm_import_job.upsert_status_task(child_id=task.id)
self.assertEqual(
models.Quotation.objects.filter(user=self.local_user).count(), 0
)
@ -438,8 +602,6 @@ class BookwyrmImport(TestCase):
"""Take a list and ListItems as JSON and create DB entries
if they don't already exist"""
book_data = self.json_data["books"][0]
other_book = models.Edition.objects.create(
title="Another Book", remote_id="https://example.com/book/9876"
)
@ -471,8 +633,8 @@ class BookwyrmImport(TestCase):
):
bookwyrm_import_job.upsert_lists(
self.local_user,
book_data["lists"],
other_book.id,
self.json_data["books"][0]["lists"],
)
self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1)
@ -488,8 +650,6 @@ class BookwyrmImport(TestCase):
"""Take a list and ListItems as JSON and create DB entries
if they don't already exist"""
book_data = self.json_data["books"][0]
self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 0)
self.assertFalse(models.ListItem.objects.filter(book=self.book.id).exists())
@ -499,8 +659,8 @@ class BookwyrmImport(TestCase):
):
bookwyrm_import_job.upsert_lists(
self.local_user,
book_data["lists"],
self.book.id,
self.json_data["books"][0]["lists"],
)
self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1)
@ -526,12 +686,13 @@ class BookwyrmImport(TestCase):
book=self.book, shelf=shelf, user=self.local_user
)
book_data = self.json_data["books"][0]
with (
patch("bookwyrm.activitystreams.add_book_statuses_task.delay"),
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
):
bookwyrm_import_job.upsert_shelves(self.book, self.local_user, book_data)
bookwyrm_import_job.upsert_shelves(
self.local_user, self.book, self.json_data["books"][0].get("shelves")
)
self.assertEqual(
models.ShelfBook.objects.filter(user=self.local_user.id).count(), 2
@ -545,13 +706,13 @@ class BookwyrmImport(TestCase):
models.ShelfBook.objects.filter(user=self.local_user.id).count(), 0
)
book_data = self.json_data["books"][0]
with (
patch("bookwyrm.activitystreams.add_book_statuses_task.delay"),
patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"),
):
bookwyrm_import_job.upsert_shelves(self.book, self.local_user, book_data)
bookwyrm_import_job.upsert_shelves(
self.local_user, self.book, self.json_data["books"][0].get("shelves")
)
self.assertEqual(
models.ShelfBook.objects.filter(user=self.local_user.id).count(), 2
@ -561,3 +722,49 @@ class BookwyrmImport(TestCase):
self.assertEqual(
models.Shelf.objects.filter(user=self.local_user.id).count(), 4
)
def test_update_followers_address(self):
"""test updating followers address to local"""
user = self.local_user
followers = ["https://old.address/user/oldusername/followers"]
new_followers = bookwyrm_import_job.update_followers_address(user, followers)
self.assertEqual(new_followers, [f"{self.local_user.remote_id}/followers"])
def test_is_alias(self):
"""test checking for valid alias"""
self.rat_user.also_known_as.add(self.local_user)
with patch(
"bookwyrm.activitypub.resolve_remote_id", return_value=self.rat_user
):
alias = bookwyrm_import_job.is_alias(
self.local_user, self.rat_user.remote_id
)
self.assertTrue(alias)
def test_status_already_exists(self):
"""test status checking"""
string = '{"id":"https://www.example.com/user/rat/comment/4","type":"Comment","published":"2023-08-14T04:48:18.746+00:00","attributedTo":"https://www.example.com/user/rat","content":"<p>this is a comment about an amazing book</p>","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://www.example.com/user/rat/followers"],"replies":{"id":"https://www.example.com/user/rat/comment/4/replies","type":"OrderedCollection","totalItems":0,"first":"https://www.example.com/user/rat/comment/4/replies?page=1","last":"https://www.example.com/user/rat/comment/4/replies?page=1","@context":"https://www.w3.org/ns/activitystreams"},"tag":[],"attachment":[],"sensitive":false,"inReplyToBook":"https://www.example.com/book/4","readingStatus":null,"@context":"https://www.w3.org/ns/activitystreams"}' # pylint: disable=line-too-long
status = json.loads(string)
parsed = activitypub.parse(status)
exists = bookwyrm_import_job.status_already_exists(self.local_user, parsed)
self.assertFalse(exists)
comment = models.Comment.objects.create(
user=self.local_user, book=self.book, content="<p>hi</p>"
)
status_two = comment.to_activity()
parsed_two = activitypub.parse(status_two)
exists_two = bookwyrm_import_job.status_already_exists(
self.local_user, parsed_two
)
self.assertTrue(exists_two)

View file

@ -434,6 +434,11 @@ urlpatterns = [
# imports
re_path(r"^import/?$", views.Import.as_view(), name="import"),
re_path(r"^user-import/?$", views.UserImport.as_view(), name="user-import"),
re_path(
r"^user-import/(?P<job_id>\d+)/?$",
views.UserImportStatus.as_view(),
name="user-import-status",
),
re_path(
r"^import/(?P<job_id>\d+)/?$",
views.ImportStatus.as_view(),
@ -444,6 +449,11 @@ urlpatterns = [
views.stop_import,
name="import-stop",
),
re_path(
r"^user-import/(?P<job_id>\d+)/stop/?$",
views.stop_user_import,
name="user-import-stop",
),
re_path(
r"^import/(?P<job_id>\d+)/retry/(?P<item_id>\d+)/?$",
views.retry_item,
@ -454,6 +464,11 @@ urlpatterns = [
views.ImportTroubleshoot.as_view(),
name="import-troubleshoot",
),
re_path(
r"^user-import/(?P<job_id>\d+)/failed/?$",
views.UserImportTroubleshoot.as_view(),
name="user-import-troubleshoot",
),
re_path(
r"^import/(?P<job_id>\d+)/review/?$",
views.ImportManualReview.as_view(),

View file

@ -85,10 +85,17 @@ from .shelf.shelf import Shelf
from .shelf.shelf_actions import create_shelf, delete_shelf
from .shelf.shelf_actions import shelve, unshelve
# csv import
from .imports.import_data import Import, UserImport
from .imports.import_status import ImportStatus, retry_item, stop_import
# csv and user import
from .imports.import_data import Import, UserImport, user_import_available
from .imports.import_status import (
ImportStatus,
UserImportStatus,
retry_item,
stop_import,
stop_user_import,
)
from .imports.troubleshoot import ImportTroubleshoot
from .imports.user_troubleshoot import UserImportTroubleshoot
from .imports.manually_review import (
ImportManualReview,
approve_import_item,

View file

@ -1,6 +1,7 @@
""" import books from another app """
from io import TextIOWrapper
import datetime
from typing import Optional
from django.contrib.auth.decorators import login_required
from django.db.models import Avg, ExpressionWrapper, F, fields
@ -149,36 +150,35 @@ class UserImport(View):
jobs = BookwyrmImportJob.objects.filter(user=request.user).order_by(
"-created_date"
)
site = models.SiteSettings.objects.get()
hours = site.user_import_time_limit
allowed = (
jobs.first().created_date < timezone.now() - datetime.timedelta(hours=hours)
if jobs.first()
else True
)
next_available = (
jobs.first().created_date + datetime.timedelta(hours=hours)
if not allowed
else False
)
paginated = Paginator(jobs, PAGE_LENGTH)
page = paginated.get_page(request.GET.get("page"))
data = {
"import_form": forms.ImportUserForm(),
"jobs": page,
"user_import_hours": hours,
"next_available": next_available,
"next_available": user_import_available(user=request.user),
"page_range": paginated.get_elided_page_range(
page.number, on_each_side=2, on_ends=1
),
"invalid": invalid,
}
seconds = get_or_set(
"avg-user-import-time", get_average_user_import_time, timeout=86400
)
if seconds and seconds > 60**2:
data["recent_avg_hours"] = seconds / (60**2)
elif seconds:
data["recent_avg_minutes"] = seconds / 60
return TemplateResponse(request, "import/import_user.html", data)
def post(self, request):
"""ingest a Bookwyrm json file"""
site = models.SiteSettings.objects.get()
if not site.imports_enabled:
raise PermissionDenied()
importer = BookwyrmImporter()
form = forms.ImportUserForm(request.POST, request.FILES)
@ -194,3 +194,45 @@ class UserImport(View):
job.start_job()
return redirect("user-import")
def user_import_available(user: models.User) -> Optional[tuple[datetime, int]]:
"""for a given user, determine whether they are allowed to run
a user import and if not, return a tuple with the next available
time they can import, and how many hours between imports allowed"""
jobs = BookwyrmImportJob.objects.filter(user=user).order_by("-created_date")
site = models.SiteSettings.objects.get()
hours = site.user_import_time_limit
allowed = (
jobs.first().created_date < timezone.now() - datetime.timedelta(hours=hours)
if jobs.first()
else True
)
if allowed and site.imports_enabled:
return False
return (jobs.first().created_date + datetime.timedelta(hours=hours), hours)
def get_average_user_import_time() -> float:
"""Helper to figure out how long imports are taking (returns seconds)"""
last_week = timezone.now() - datetime.timedelta(days=7)
recent_avg = (
models.BookwyrmImportJob.objects.filter(
created_date__gte=last_week, complete=True
)
.exclude(status="stopped")
.annotate(
runtime=ExpressionWrapper(
F("updated_date") - F("created_date"),
output_field=fields.DurationField(),
)
)
.aggregate(Avg("runtime"))
.get("runtime__avg")
)
if recent_avg:
return recent_avg.total_seconds()
return None

View file

@ -83,3 +83,79 @@ def stop_import(request, job_id):
job = get_object_or_404(models.ImportJob, id=job_id, user=request.user)
job.stop_job()
return redirect("import-status", job_id)
# pylint: disable= no-self-use
@method_decorator(login_required, name="dispatch")
class UserImportStatus(View):
"""status of an existing import"""
def get(self, request, job_id):
"""status of an import job"""
job = get_object_or_404(models.BookwyrmImportJob, id=job_id)
if job.user != request.user:
raise PermissionDenied()
jobs = job.book_tasks.all().order_by("created_date")
item_count = job.item_count or 1
paginated = Paginator(jobs, PAGE_LENGTH)
page = paginated.get_page(request.GET.get("page"))
book_jobs_count = job.book_tasks.count() or "(pending...)"
if job.complete and not job.book_tasks.count():
book_jobs_count = 0
status_jobs_count = job.status_tasks.count() or "(pending...)"
if job.complete and not job.status_tasks.count():
status_jobs_count = 0
relationship_jobs_count = job.relationship_tasks.count() or "(pending...)"
if job.complete and not job.relationship_tasks.count():
relationship_jobs_count = 0
data = {
"job": job,
"items": page,
"completed_books_count": job.book_tasks.filter(status="complete").count()
or 0,
"completed_statuses_count": job.status_tasks.filter(
status="complete"
).count()
or 0,
"completed_relationships_count": job.relationship_tasks.filter(
status="complete"
).count()
or 0,
"failed_books_count": job.book_tasks.filter(status="failed").count() or 0,
"failed_statuses_count": job.status_tasks.filter(status="failed").count()
or 0,
"failed_relationships_count": job.relationship_tasks.filter(
status="failed"
).count()
or 0,
"fail_count": job.child_jobs.filter(status="failed").count(),
"book_jobs_count": book_jobs_count,
"status_jobs_count": status_jobs_count,
"relationship_jobs_count": relationship_jobs_count,
"page_range": paginated.get_elided_page_range(
page.number, on_each_side=2, on_ends=1
),
"show_progress": True,
"item_count": item_count,
"complete_count": item_count - job.pending_item_count,
"percent": job.percent_complete,
# hours since last import item update
"inactive_time": (job.updated_date - timezone.now()).seconds / 60 / 60,
}
return TemplateResponse(request, "import/user_import_status.html", data)
@login_required
@require_POST
def stop_user_import(request, job_id):
"""scrap that"""
job = get_object_or_404(models.BookwyrmImportJob, id=job_id, user=request.user)
job.stop_job()
return redirect("user-import-status", job_id)

View file

@ -0,0 +1,50 @@
""" import books from another app """
from django.contrib.auth.decorators import login_required
from django.core.exceptions import PermissionDenied
from django.core.paginator import Paginator
from django.shortcuts import get_object_or_404, redirect
from django.template.response import TemplateResponse
from django.utils.decorators import method_decorator
from django.urls import reverse
from django.views import View
from bookwyrm import models
from bookwyrm.importers import BookwyrmImporter
from bookwyrm.views import user_import_available
from bookwyrm.settings import PAGE_LENGTH
# pylint: disable= no-self-use
@method_decorator(login_required, name="dispatch")
class UserImportTroubleshoot(View):
"""failed items in an existing user import"""
def get(self, request, job_id):
"""status of an import job"""
job = get_object_or_404(models.BookwyrmImportJob, id=job_id)
if job.user != request.user:
raise PermissionDenied()
items = job.child_jobs.order_by("task_id").filter(status="failed")
paginated = Paginator(items, PAGE_LENGTH)
page = paginated.get_page(request.GET.get("page"))
data = {
"next_available": user_import_available(user=request.user),
"job": job,
"items": page,
"page_range": paginated.get_elided_page_range(
page.number, on_each_side=2, on_ends=1
),
"complete": True,
"page_path": reverse("user-import-troubleshoot", args=[job.id]),
}
return TemplateResponse(request, "import/user_troubleshoot.html", data)
def post(self, request, job_id):
"""retry lines from a user import"""
job = get_object_or_404(models.BookwyrmImportJob, id=job_id)
importer = BookwyrmImporter()
job = importer.create_retry_job(request.user, job)
job.start_job()
return redirect(f"/user-import/{job.id}")

View file

@ -1,9 +1,12 @@
""" Let users export their book data """
from datetime import timedelta
import csv
import datetime
import io
from django.contrib.auth.decorators import login_required
from django.db.models import Avg, ExpressionWrapper, F
from django.db.models.fields import DurationField
from django.core.paginator import Paginator
from django.db.models import Q
from django.http import HttpResponse, HttpResponseServerError, Http404
@ -19,7 +22,7 @@ from storages.backends.s3 import S3Storage
from bookwyrm import models
from bookwyrm.models.bookwyrm_export_job import BookwyrmExportJob
from bookwyrm import settings
from bookwyrm.utils.cache import get_or_set
# pylint: disable=no-self-use,too-many-locals
@method_decorator(login_required, name="dispatch")
@ -203,6 +206,14 @@ class ExportUser(View):
),
}
seconds = get_or_set(
"avg-user-export-time", get_average_export_time, timeout=86400
)
if seconds and seconds > 60**2:
data["recent_avg_hours"] = seconds / (60**2)
elif seconds:
data["recent_avg_minutes"] = seconds / 60
return TemplateResponse(request, "preferences/export-user.html", data)
def post(self, request):
@ -253,3 +264,26 @@ class ExportArchive(View):
)
except FileNotFoundError:
raise Http404()
def get_average_export_time() -> float:
"""Helper to figure out how long exports are taking (returns seconds)"""
last_week = timezone.now() - datetime.timedelta(days=7)
recent_avg = (
models.BookwyrmExportJob.objects.filter(
created_date__gte=last_week, complete=True
)
.exclude(status="stopped")
.annotate(
runtime=ExpressionWrapper(
F("updated_date") - F("created_date"),
output_field=DurationField(),
)
)
.aggregate(Avg("runtime"))
.get("runtime__avg")
)
if recent_avg:
return recent_avg.total_seconds()
return None