diff --git a/bookwyrm/forms/forms.py b/bookwyrm/forms/forms.py index ea6093750..3d555f308 100644 --- a/bookwyrm/forms/forms.py +++ b/bookwyrm/forms/forms.py @@ -25,6 +25,10 @@ class ImportForm(forms.Form): csv_file = forms.FileField() +class ImportUserForm(forms.Form): + archive_file = forms.FileField() + + class ShelfForm(CustomForm): class Meta: model = models.Shelf diff --git a/bookwyrm/importers/__init__.py b/bookwyrm/importers/__init__.py index 6ce50f160..8e92872f2 100644 --- a/bookwyrm/importers/__init__.py +++ b/bookwyrm/importers/__init__.py @@ -1,6 +1,7 @@ """ import classes """ from .importer import Importer +from .bookwyrm_import import BookwyrmImporter from .calibre_import import CalibreImporter from .goodreads_import import GoodreadsImporter from .librarything_import import LibrarythingImporter diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py new file mode 100644 index 000000000..a2eb71725 --- /dev/null +++ b/bookwyrm/importers/bookwyrm_import.py @@ -0,0 +1,19 @@ +"""Import data from Bookwyrm export files""" +from bookwyrm import settings +from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob + + +class BookwyrmImporter: + """Import a Bookwyrm User export JSON file. + This is kind of a combination of an importer and a connector. + """ + + def process_import(self, user, archive_file, settings): + """import user data from a Bookwyrm export file""" + + required = [k for k in settings if settings.get(k) == "on"] + + job = BookwyrmImportJob.objects.create( + user=user, archive_file=archive_file, required=required + ) + return job diff --git a/bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py b/bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py new file mode 100644 index 000000000..d13668cc4 --- /dev/null +++ b/bookwyrm/migrations/0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob.py @@ -0,0 +1,165 @@ +# Generated by Django 3.2.19 on 2023-08-31 22:57 + +from django.conf import settings +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0178_auto_20230328_2132"), + ] + + operations = [ + migrations.CreateModel( + name="ParentJob", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("task_id", models.UUIDField(blank=True, null=True, unique=True)), + ( + "created_date", + models.DateTimeField(default=django.utils.timezone.now), + ), + ( + "updated_date", + models.DateTimeField(default=django.utils.timezone.now), + ), + ("complete", models.BooleanField(default=False)), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("active", "Active"), + ("complete", "Complete"), + ("stopped", "Stopped"), + ], + default="pending", + max_length=50, + null=True, + ), + ), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="BookwyrmExportJob", + fields=[ + ( + "parentjob_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="bookwyrm.parentjob", + ), + ), + ("export_data", models.FileField(null=True, upload_to="")), + ], + options={ + "abstract": False, + }, + bases=("bookwyrm.parentjob",), + ), + migrations.CreateModel( + name="BookwyrmImportJob", + fields=[ + ( + "parentjob_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="bookwyrm.parentjob", + ), + ), + ("archive_file", models.FileField(blank=True, null=True, upload_to="")), + ("import_data", models.JSONField(null=True)), + ( + "required", + django.contrib.postgres.fields.ArrayField( + base_field=models.CharField(blank=True, max_length=50), + blank=True, + size=None, + ), + ), + ], + options={ + "abstract": False, + }, + bases=("bookwyrm.parentjob",), + ), + migrations.CreateModel( + name="ChildJob", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("task_id", models.UUIDField(blank=True, null=True, unique=True)), + ( + "created_date", + models.DateTimeField(default=django.utils.timezone.now), + ), + ( + "updated_date", + models.DateTimeField(default=django.utils.timezone.now), + ), + ("complete", models.BooleanField(default=False)), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("active", "Active"), + ("complete", "Complete"), + ("stopped", "Stopped"), + ], + default="pending", + max_length=50, + null=True, + ), + ), + ( + "parent_job", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="child_jobs", + to="bookwyrm.parentjob", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/bookwyrm/migrations/0182_merge_20230905_2240.py b/bookwyrm/migrations/0182_merge_20230905_2240.py new file mode 100644 index 000000000..83920a9c7 --- /dev/null +++ b/bookwyrm/migrations/0182_merge_20230905_2240.py @@ -0,0 +1,13 @@ +# Generated by Django 3.2.19 on 2023-09-05 22:40 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0179_bookwyrmexportjob_bookwyrmimportjob_childjob_parentjob"), + ("bookwyrm", "0181_merge_20230806_2302"), + ] + + operations = [] diff --git a/bookwyrm/models/__init__.py b/bookwyrm/models/__init__.py index 7b779190b..c2e5308cc 100644 --- a/bookwyrm/models/__init__.py +++ b/bookwyrm/models/__init__.py @@ -26,6 +26,7 @@ from .federated_server import FederatedServer from .group import Group, GroupMember, GroupMemberInvitation from .import_job import ImportJob, ImportItem +from .bookwyrm_import_job import BookwyrmImportJob from .site import SiteSettings, Theme, SiteInvite from .site import PasswordReset, InviteRequest diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py new file mode 100644 index 000000000..c262d9b5c --- /dev/null +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -0,0 +1,216 @@ +import logging + +from django.db.models import FileField +from django.db.models import Q +from django.core.serializers.json import DjangoJSONEncoder +from django.core.files.base import ContentFile + +from bookwyrm import models +from bookwyrm.settings import DOMAIN +from bookwyrm.tasks import app, IMPORTS +from bookwyrm.models.job import ParentJob, ParentTask, SubTask, create_child_job +from uuid import uuid4 +from bookwyrm.utils.tar import BookwyrmTarFile + +logger = logging.getLogger(__name__) + + +class BookwyrmExportJob(ParentJob): + """entry for a specific request to export a bookwyrm user""" + + export_data = FileField(null=True) + + def start_job(self): + """Start the job""" + start_export_task.delay(job_id=self.id, no_children=True) + + return self + + +@app.task(queue=IMPORTS, base=ParentTask) +def start_export_task(**kwargs): + """trigger the child tasks for each row""" + job = BookwyrmExportJob.objects.get(id=kwargs["job_id"]) + + # don't start the job if it was stopped from the UI + if job.complete: + return + + # This is where ChildJobs get made + job.export_data = ContentFile(b"", str(uuid4())) + + json_data = json_export(job.user) + tar_export(json_data, job.user, job.export_data) + + job.save(update_fields=["export_data"]) + + +def tar_export(json_data: str, user, f): + f.open("wb") + with BookwyrmTarFile.open(mode="w:gz", fileobj=f) as tar: + tar.write_bytes(json_data.encode("utf-8")) + + # Add avatar image if present + if getattr(user, "avatar", False): + tar.add_image(user.avatar, filename="avatar") + + editions, books = get_books_for_user(user) + for book in editions: + tar.add_image(book.cover) + + f.close() + + +def json_export(user): + """Generate an export for a user""" + # user + exported_user = {} + vals = [ + "username", + "name", + "summary", + "manually_approves_followers", + "hide_follows", + "show_goal", + "show_suggested_users", + "discoverable", + "preferred_timezone", + "default_post_privacy", + ] + for k in vals: + exported_user[k] = getattr(user, k) + + if getattr(user, "avatar", False): + exported_user["avatar"] = f'https://{DOMAIN}{getattr(user, "avatar").url}' + + # reading goals + reading_goals = models.AnnualGoal.objects.filter(user=user).distinct() + goals_list = [] + try: + for goal in reading_goals: + goals_list.append( + {"goal": goal.goal, "year": goal.year, "privacy": goal.privacy} + ) + except Exception: + pass + + try: + readthroughs = models.ReadThrough.objects.filter(user=user).distinct().values() + readthroughs = list(readthroughs) + except Exception as e: + readthroughs = [] + + # books + editions, books = get_books_for_user(user) + final_books = [] + + for book in books.values(): + edition = editions.filter(id=book["id"]) + book["edition"] = edition.values()[0] + # authors + book["authors"] = list(edition.first().authors.all().values()) + # readthroughs + book_readthroughs = ( + models.ReadThrough.objects.filter(user=user, book=book["id"]) + .distinct() + .values() + ) + book["readthroughs"] = list(book_readthroughs) + # shelves + shelf_books = models.ShelfBook.objects.filter( + user=user, book=book["id"] + ).distinct() + shelves_from_books = models.Shelf.objects.filter( + shelfbook__in=shelf_books, user=user + ) + + book["shelves"] = list(shelves_from_books.values()) + book["shelf_books"] = {} + + for shelf in shelves_from_books: + shelf_contents = models.ShelfBook.objects.filter( + user=user, shelf=shelf + ).distinct() + + book["shelf_books"][shelf.identifier] = list(shelf_contents.values()) + + # book lists + book_lists = models.List.objects.filter( + books__in=[book["id"]], user=user + ).distinct() + book["lists"] = list(book_lists.values()) + book["list_items"] = {} + for blist in book_lists: + list_items = models.ListItem.objects.filter(book_list=blist).distinct() + book["list_items"][blist.name] = list(list_items.values()) + + # reviews + reviews = models.Review.objects.filter(user=user, book=book["id"]).distinct() + + book["reviews"] = list(reviews.values()) + + # comments + comments = models.Comment.objects.filter(user=user, book=book["id"]).distinct() + + book["comments"] = list(comments.values()) + logger.error("FINAL COMMENTS") + logger.error(book["comments"]) + + # quotes + quotes = models.Quotation.objects.filter(user=user, book=book["id"]).distinct() + # quote_statuses = models.Status.objects.filter( + # id__in=quotes, user=kwargs["user"] + # ).distinct() + + book["quotes"] = list(quotes.values()) + + logger.error("FINAL QUOTES") + logger.error(book["quotes"]) + + # append everything + final_books.append(book) + + # saved book lists + saved_lists = models.List.objects.filter(id__in=user.saved_lists.all()).distinct() + saved_lists = [l.remote_id for l in saved_lists] + + # follows + follows = models.UserFollows.objects.filter(user_subject=user).distinct() + following = models.User.objects.filter( + userfollows_user_object__in=follows + ).distinct() + follows = [f.remote_id for f in following] + + # blocks + blocks = models.UserBlocks.objects.filter(user_subject=user).distinct() + blocking = models.User.objects.filter(userblocks_user_object__in=blocks).distinct() + + blocks = [b.remote_id for b in blocking] + + data = { + "user": exported_user, + "goals": goals_list, + "books": final_books, + "saved_lists": saved_lists, + "follows": follows, + "blocked_users": blocks, + } + + return DjangoJSONEncoder().encode(data) + + +def get_books_for_user(user): + """Get all the books and editions related to a user + :returns: tuple of editions, books + """ + all_books = models.Edition.viewer_aware_objects(user) + editions = all_books.filter( + Q(shelves__user=user) + | Q(readthrough__user=user) + | Q(review__user=user) + | Q(list__user=user) + | Q(comment__user=user) + | Q(quotation__user=user) + ).distinct() + books = models.Book.objects.filter(id__in=editions).distinct() + return editions, books diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py new file mode 100644 index 000000000..696f8061a --- /dev/null +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -0,0 +1,505 @@ +from functools import reduce +import json +import operator + +from django.db.models import FileField, JSONField, CharField +from django.db.models import Q +from django.utils.dateparse import parse_datetime +from django.contrib.postgres.fields import ArrayField as DjangoArrayField + +from bookwyrm import activitypub +from bookwyrm import models +from bookwyrm.tasks import app, IMPORTS +from bookwyrm.models.job import ( + ParentJob, + ParentTask, + ChildJob, + SubTask, + create_child_job, +) +from bookwyrm.utils.tar import BookwyrmTarFile +import json + + +class BookwyrmImportJob(ParentJob): + """entry for a specific request for importing a bookwyrm user backup""" + + archive_file = FileField(null=True, blank=True) + import_data = JSONField(null=True) + required = DjangoArrayField(CharField(max_length=50, blank=True), blank=True) + + def start_job(self): + """Start the job""" + start_import_task.delay(job_id=self.id, no_children=True) + + +@app.task(queue=IMPORTS, base=ParentTask) +def start_import_task(**kwargs): + """trigger the child import tasks for each user data""" + job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) + archive_file = job.archive_file + + # don't start the job if it was stopped from the UI + if job.complete: + return + + archive_file.open("rb") + with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: + job.import_data = json.loads(tar.read("archive.json").decode("utf-8")) + + if "include_user_profile" in job.required: + update_user_profile(job.user, tar, job.import_data.get("user")) + if "include_user_settings" in job.required: + update_user_settings(job.user, job.import_data.get("user")) + if "include_goals" in job.required: + update_goals(job.user, job.import_data.get("goals")) + if "include_saved_lists" in job.required: + upsert_saved_lists(job.user, job.import_data.get("saved_lists")) + if "include_follows" in job.required: + upsert_follows(job.user, job.import_data.get("follows")) + if "include_blocks" in job.required: + upsert_user_blocks(job.user, job.import_data.get("blocked_users")) + + process_books(job, tar) + + job.save() + archive_file.close() + + +def process_books(job, tar): + """process user import data related to books""" + + # create the books. We need to merge Book and Edition instances + # and also check whether these books already exist in the DB + books = job.import_data.get("books") + + for data in books: + book = get_or_create_edition(data, tar) + + if "include_shelves" in job.required: + upsert_shelves(book, job.user, data) + + if "include_readthroughs" in job.required: + upsert_readthroughs(data.get("readthroughs"), job.user, book.id) + + if "include_reviews" in job.required: + get_or_create_statuses( + job.user, models.Review, data.get("reviews"), book.id + ) + + if "include_comments" in job.required: + get_or_create_statuses( + job.user, models.Comment, data.get("comments"), book.id + ) + + if "include_quotes" in job.required: + get_or_create_statuses( + job.user, models.Quotation, data.get("quotes"), book.id + ) + if "include_lists" in job.required: + upsert_lists(job.user, data.get("lists"), data.get("list_items"), book.id) + + +def get_or_create_edition(book_data, tar): + """Take a JSON string of book and edition data, + find or create the edition in the database and + return an edition instance""" + + cover_path = book_data.get( + "cover", None + ) # we use this further down but need to assign a var before cleaning + + clean_book = clean_values(book_data) + book = clean_book.copy() # don't mutate the original book data + + # prefer edition values only if they are not null + edition = clean_values(book["edition"]) + for key in edition.keys(): + if key not in book.keys() or ( + key in book.keys() and (edition[key] not in [None, ""]) + ): + book[key] = edition[key] + + existing = find_existing(models.Edition, book, None) + if existing: + return existing + + # the book is not in the local database, so we have to do this the hard way + local_authors = get_or_create_authors(book["authors"]) + + # get rid of everything that's not strictly in a Book + # or is many-to-many so can't be set directly + associated_values = [ + "edition", + "authors", + "readthroughs", + "shelves", + "shelf_books", + "lists", + "list_items", + "reviews", + "comments", + "quotes", + ] + + for val in associated_values: + del book[val] + + # now we can save the book as an Edition + new_book = models.Edition.objects.create(**book) + new_book.authors.set(local_authors) # now we can add authors with set() + + # get cover from original book_data because we lost it in clean_values + if cover_path: + tar.write_image_to_file(cover_path, new_book.cover) + + # NOTE: clean_values removes "last_edited_by" because it's a user ID from the old database + # if this is required, bookwyrm_export_job will need to bring in the user who edited it. + + # create parent + work = models.Work.objects.create(title=book["title"]) + work.authors.set(local_authors) + new_book.parent_work = work + + new_book.save(broadcast=False) + return new_book + + +def clean_values(data): + """clean values we don't want when creating new instances""" + + values = [ + "id", + "pk", + "remote_id", + "cover", + "preview_image", + "last_edited_by", + "last_edited_by_id", + "user", + "book_list", + "shelf_book", + "parent_work_id", + ] + + common = data.keys() & values + new_data = data + for val in common: + del new_data[val] + return new_data + + +def find_existing(cls, data, user): + """Given a book or author, find any existing model instances""" + + identifiers = [ + "openlibrary_key", + "inventaire_id", + "librarything_key", + "goodreads_key", + "asin", + "isfdb", + "isbn_10", + "isbn_13", + "oclc_number", + "origin_id", + "viaf", + "wikipedia_link", + "isni", + "gutenberg_id", + ] + + match_fields = [] + for i in identifiers: + if data.get(i) not in [None, ""]: + match_fields.append({i: data.get(i)}) + + if len(match_fields) > 0: + match = cls.objects.filter(reduce(operator.or_, (Q(**f) for f in match_fields))) + return match.first() + return None + + +def get_or_create_authors(data): + """Take a JSON string of authors find or create the authors + in the database and return a list of author instances""" + + authors = [] + for author in data: + clean = clean_values(author) + existing = find_existing(models.Author, clean, None) + if existing: + authors.append(existing) + else: + new = models.Author.objects.create(**clean) + authors.append(new) + return authors + + +def upsert_readthroughs(data, user, book_id): + """Take a JSON string of readthroughs, find or create the + instances in the database and return a list of saved instances""" + + for rt in data: + start_date = ( + parse_datetime(rt["start_date"]) if rt["start_date"] is not None else None + ) + finish_date = ( + parse_datetime(rt["finish_date"]) if rt["finish_date"] is not None else None + ) + stopped_date = ( + parse_datetime(rt["stopped_date"]) + if rt["stopped_date"] is not None + else None + ) + readthrough = { + "user": user, + "book": models.Edition.objects.get(id=book_id), + "progress": rt["progress"], + "progress_mode": rt["progress_mode"], + "start_date": start_date, + "finish_date": finish_date, + "stopped_date": stopped_date, + "is_active": rt["is_active"], + } + + existing = models.ReadThrough.objects.filter(**readthrough).exists() + if not existing: + models.ReadThrough.objects.create(**readthrough) + + +def get_or_create_statuses(user, cls, data, book_id): + """Take a JSON string of a status and + find or create the instances in the database""" + + for book_status in data: + + keys = [ + "content", + "raw_content", + "content_warning", + "privacy", + "sensitive", + "published_date", + "reading_status", + "name", + "rating", + "quote", + "raw_quote", + "progress", + "progress_mode", + "position", + "position_mode", + ] + common = book_status.keys() & keys + status = {k: book_status[k] for k in common} + status["published_date"] = parse_datetime(book_status["published_date"]) + if "rating" in common: + status["rating"] = float(book_status["rating"]) + book = models.Edition.objects.get(id=book_id) + exists = cls.objects.filter(**status, book=book, user=user).exists() + if not exists: + cls.objects.create(**status, book=book, user=user) + + +def upsert_lists(user, lists, items, book_id): + """Take a list and ListItems as JSON and create DB entries if they don't already exist""" + + book = models.Edition.objects.get(id=book_id) + + for lst in lists: + book_list = models.List.objects.filter(name=lst["name"], user=user).first() + if not book_list: + book_list = models.List.objects.create( + user=user, + name=lst["name"], + description=lst["description"], + curation=lst["curation"], + privacy=lst["privacy"], + ) + + # If the list exists but the ListItem doesn't don't try to add it + # with the same order as an existing item + count = models.ListItem.objects.filter(book_list=book_list).count() + + for i in items[lst["name"]]: + if not models.ListItem.objects.filter( + book=book, book_list=book_list, user=user + ).exists(): + models.ListItem.objects.create( + book=book, + book_list=book_list, + user=user, + notes=i["notes"], + order=i["order"] + count, + ) + + +def upsert_shelves(book, user, book_data): + """Take shelf and ShelfBooks JSON objects and create + DB entries if they don't already exist""" + + shelves = book_data["shelves"] + + for shelf in shelves: + book_shelf = models.Shelf.objects.filter(name=shelf["name"], user=user).first() + if not book_shelf: + book_shelf = models.Shelf.objects.create( + name=shelf["name"], + user=user, + identifier=shelf["identifier"], + description=shelf["description"], + editable=shelf["editable"], + privacy=shelf["privacy"], + ) + + for shelfbook in book_data["shelf_books"][book_shelf.identifier]: + + shelved_date = parse_datetime(shelfbook["shelved_date"]) + + if not models.ShelfBook.objects.filter( + book=book, shelf=book_shelf, user=user + ).exists(): + models.ShelfBook.objects.create( + book=book, + shelf=book_shelf, + user=user, + shelved_date=shelved_date, + ) + + +def update_user_profile(user, tar, data): + """update the user's profile from import data""" + name = data.get("name") + username = data.get("username").split("@")[0] + user.name = name if name else username + user.summary = data.get("summary") + user.save(update_fields=["name", "summary"]) + + if data.get("avatar") is not None: + avatar_filename = next(filter(lambda n: n.startswith("avatar"), tar.getnames())) + tar.write_image_to_file(avatar_filename, user.avatar) + + +def update_user_settings(user, data): + """update the user's settings from import data""" + + update_fields = [ + "manually_approves_followers", + "hide_follows", + "show_goal", + "show_suggested_users", + "discoverable", + "preferred_timezone", + "default_post_privacy", + ] + + for field in update_fields: + setattr(user, field, data[field]) + user.save(update_fields=update_fields) + + +@app.task(queue=IMPORTS, base=SubTask) +def update_user_settings_task(job_id, child_id): + """wrapper task for user's settings import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return update_user_settings(parent_job.user, parent_job.import_data.get("user")) + + +def update_goals(user, data): + """update the user's goals from import data""" + + for goal in data: + # edit the existing goal if there is one instead of making a new one + existing = models.AnnualGoal.objects.filter( + year=goal["year"], user=user + ).first() + if existing: + for k in goal.keys(): + setattr(existing, k, goal[k]) + existing.save() + else: + goal["user"] = user + models.AnnualGoal.objects.create(**goal) + + +@app.task(queue=IMPORTS, base=SubTask) +def update_goals_task(job_id, child_id): + """wrapper task for user's goals import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return update_goals(parent_job.user, parent_job.import_data.get("goals")) + + +def upsert_saved_lists(user, values): + """Take a list of remote ids and add as saved lists""" + + for remote_id in values: + book_list = activitypub.resolve_remote_id(remote_id, models.List) + if book_list: + user.saved_lists.add(book_list) + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_saved_lists_task(job_id, child_id): + """wrapper task for user's saved lists import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return upsert_saved_lists( + parent_job.user, parent_job.import_data.get("saved_lists") + ) + + +def upsert_follows(user, values): + """Take a list of remote ids and add as follows""" + + for remote_id in values: + followee = activitypub.resolve_remote_id(remote_id, models.User) + if followee: + (follow_request, created,) = models.UserFollowRequest.objects.get_or_create( + user_subject=user, + user_object=followee, + ) + + if not created: + # this request probably failed to connect with the remote + # that means we should save to trigger a re-broadcast + follow_request.save() + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_follows_task(job_id, child_id): + """wrapper task for user's follows import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return upsert_follows(parent_job.user, parent_job.import_data.get("follows")) + + +def upsert_user_blocks(user, user_ids): + """block users""" + + for user_id in user_ids: + user_object = activitypub.resolve_remote_id(user_id, models.User) + if user_object: + exists = models.UserBlocks.objects.filter( + user_subject=user, user_object=user_object + ).exists() + if not exists: + models.UserBlocks.objects.create( + user_subject=user, user_object=user_object + ) + # remove the blocked users's lists from the groups + models.List.remove_from_group(user, user_object) + # remove the blocked user from all blocker's owned groups + models.GroupMember.remove(user, user_object) + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_user_blocks_task(job_id, child_id): + """wrapper task for user's blocks import""" + parent_job = BookwyrmImportJob.objects.get(id=job_id) + + return upsert_user_blocks( + parent_job.user, parent_job.import_data.get("blocked_users") + ) diff --git a/bookwyrm/models/job.py b/bookwyrm/models/job.py new file mode 100644 index 000000000..6e8d0dc5c --- /dev/null +++ b/bookwyrm/models/job.py @@ -0,0 +1,290 @@ +"""Everything needed for Celery to multi-thread complex tasks.""" + +from django.db import models +from django.db import transaction +from django.utils.translation import gettext_lazy as _ +from django.utils import timezone +from bookwyrm.models.user import User + +from bookwyrm.tasks import app + + +class Job(models.Model): + """Abstract model to store the state of a Task.""" + + class Status(models.TextChoices): + """Possible job states.""" + + PENDING = "pending", _("Pending") + ACTIVE = "active", _("Active") + COMPLETE = "complete", _("Complete") + STOPPED = "stopped", _("Stopped") + + task_id = models.UUIDField(unique=True, null=True, blank=True) + + created_date = models.DateTimeField(default=timezone.now) + updated_date = models.DateTimeField(default=timezone.now) + complete = models.BooleanField(default=False) + status = models.CharField( + max_length=50, choices=Status.choices, default=Status.PENDING, null=True + ) + + class Meta: + abstract = True + + def complete_job(self): + """Report that the job has completed""" + if self.complete: + return + + self.status = self.Status.COMPLETE + self.complete = True + self.updated_date = timezone.now() + + self.save(update_fields=["status", "complete", "updated_date"]) + + def stop_job(self): + """Stop the job""" + if self.complete: + return + + self.__terminate_job() + + self.status = self.Status.STOPPED + self.complete = True + self.updated_date = timezone.now() + + self.save(update_fields=["status", "complete", "updated_date"]) + + def set_status(self, status): + """Set job status""" + if self.complete: + return + + if self.status == status: + return + + if status == self.Status.COMPLETE: + self.complete_job() + return + + if status == self.Status.STOPPED: + self.stop_job() + return + + self.updated_date = timezone.now() + self.status = status + + self.save(update_fields=["status", "updated_date"]) + + def __terminate_job(self): + """Tell workers to ignore and not execute this task.""" + app.control.revoke(self.task_id, terminate=True) + + +class ParentJob(Job): + """Store the state of a Task which can spawn many :model:`ChildJob`s to spread + resource load. + + Intended to be sub-classed if necessary via proxy or + multi-table inheritance. + Extends :model:`Job`. + """ + + user = models.ForeignKey(User, on_delete=models.CASCADE) + + def complete_job(self): + """Report that the job has completed and stop pending + children. Extend. + """ + super().complete_job() + self.__terminate_pending_child_jobs() + + def notify_child_job_complete(self): + """let the job know when the items get work done""" + if self.complete: + return + + self.updated_date = timezone.now() + self.save(update_fields=["updated_date"]) + + if not self.complete and self.has_completed: + self.complete_job() + + def __terminate_job(self): + """Tell workers to ignore and not execute this task + & pending child tasks. Extend. + """ + super().__terminate_job() + self.__terminate_pending_child_jobs() + + def __terminate_pending_child_jobs(self): + """Tell workers to ignore and not execute any pending child tasks.""" + tasks = self.pending_child_jobs.filter(task_id__isnull=False).values_list( + "task_id", flat=True + ) + app.control.revoke(list(tasks)) + + for task in self.pending_child_jobs: + task.update(status=self.Status.STOPPED) + + @property + def has_completed(self): + """has this job finished""" + return not self.pending_child_jobs.exists() + + @property + def pending_child_jobs(self): + """items that haven't been processed yet""" + return self.child_jobs.filter(complete=False) + + +class ChildJob(Job): + """Stores the state of a Task for the related :model:`ParentJob`. + + Intended to be sub-classed if necessary via proxy or + multi-table inheritance. + Extends :model:`Job`. + """ + + parent_job = models.ForeignKey( + ParentJob, on_delete=models.CASCADE, related_name="child_jobs" + ) + + def set_status(self, status): + """Set job and parent_job status. Extend.""" + super().set_status(status) + + if ( + status == self.Status.ACTIVE + and self.parent_job.status == self.Status.PENDING + ): + self.parent_job.set_status(self.Status.ACTIVE) + + def complete_job(self): + """Report to parent_job that the job has completed. Extend.""" + super().complete_job() + self.parent_job.notify_child_job_complete() + + +class ParentTask(app.Task): + """Used with ParentJob, Abstract Tasks execute code at specific points in + a Task's lifecycle, applying to all Tasks with the same 'base'. + + All status & ParentJob.task_id assignment is managed here for you. + Usage e.g. @app.task(base=ParentTask) + """ + + def before_start(self, task_id, args, kwargs): + """Handler called before the task starts. Override. + + Prepare ParentJob before the task starts. + + Arguments: + task_id (str): Unique id of the task to execute. + args (Tuple): Original arguments for the task to execute. + kwargs (Dict): Original keyword arguments for the task to execute. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + no_children (bool): If 'True' this is the only Task expected to run + for the given ParentJob. + + Returns: + None: The return value of this handler is ignored. + """ + job = ParentJob.objects.get(id=kwargs["job_id"]) + job.task_id = task_id + job.save(update_fields=["task_id"]) + + if kwargs["no_children"]: + job.set_status(ChildJob.Status.ACTIVE) + + def on_success(self, retval, task_id, args, kwargs): + """Run by the worker if the task executes successfully. Override. + + Update ParentJob on Task complete. + + Arguments: + retval (Any): The return value of the task. + task_id (str): Unique id of the executed task. + args (Tuple): Original arguments for the executed task. + kwargs (Dict): Original keyword arguments for the executed task. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + no_children (bool): If 'True' this is the only Task expected to run + for the given ParentJob. + + Returns: + None: The return value of this handler is ignored. + """ + + if kwargs["no_children"]: + job = ParentJob.objects.get(id=kwargs["job_id"]) + job.complete_job() + + +class SubTask(app.Task): + """Used with ChildJob, Abstract Tasks execute code at specific points in + a Task's lifecycle, applying to all Tasks with the same 'base'. + + All status & ChildJob.task_id assignment is managed here for you. + Usage e.g. @app.task(base=SubTask) + """ + + def before_start(self, task_id, args, kwargs): + """Handler called before the task starts. Override. + + Prepare ChildJob before the task starts. + + Arguments: + task_id (str): Unique id of the task to execute. + args (Tuple): Original arguments for the task to execute. + kwargs (Dict): Original keyword arguments for the task to execute. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + child_id (int): Unique 'id' of the ChildJob. + + Returns: + None: The return value of this handler is ignored. + """ + child_job = ChildJob.objects.get(id=kwargs["child_id"]) + child_job.task_id = task_id + child_job.save(update_fields=["task_id"]) + child_job.set_status(ChildJob.Status.ACTIVE) + + def on_success(self, retval, task_id, args, kwargs): + """Run by the worker if the task executes successfully. Override. + + Notify ChildJob of task completion. + + Arguments: + retval (Any): The return value of the task. + task_id (str): Unique id of the executed task. + args (Tuple): Original arguments for the executed task. + kwargs (Dict): Original keyword arguments for the executed task. + + Keyword Arguments: + job_id (int): Unique 'id' of the ParentJob. + child_id (int): Unique 'id' of the ChildJob. + + Returns: + None: The return value of this handler is ignored. + """ + subtask = ChildJob.objects.get(id=kwargs["child_id"]) + subtask.complete_job() + + +@transaction.atomic +def create_child_job(parent_job, task_callback): + """Utility method for creating a ChildJob + and running a task to avoid DB race conditions + """ + child_job = ChildJob.objects.create(parent_job=parent_job) + transaction.on_commit( + lambda: task_callback.delay(job_id=parent_job.id, child_id=child_job.id) + ) + + return child_job diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html new file mode 100644 index 000000000..86e99f657 --- /dev/null +++ b/bookwyrm/templates/import/import_user.html @@ -0,0 +1,163 @@ +{% extends 'layout.html' %} +{% load i18n %} +{% load humanize %} + +{% block title %}{% trans "Import User" %}{% endblock %} + +{% block content %} +
+

{% trans "Import User" %}

+ + {% if invalid %} +
+ {% trans "Not a valid JSON file" %} +
+ {% endif %} + + + {% if import_size_limit and import_limit_reset %} +
+

{% blocktrans %}Currently you are allowed to import one user every {{ user_import_limit_reset }} days.{% endblocktrans %}

+

{% blocktrans %}You have {{ allowed_imports }} left.{% endblocktrans %}

+
+ {% endif %} + {% if recent_avg_hours or recent_avg_minutes %} +
+

+ {% if recent_avg_hours %} + {% blocktrans trimmed with hours=recent_avg_hours|floatformat:0|intcomma %} + On average, recent imports have taken {{ hours }} hours. + {% endblocktrans %} + {% else %} + {% blocktrans trimmed with minutes=recent_avg_minutes|floatformat:0|intcomma %} + On average, recent imports have taken {{ minutes }} minutes. + {% endblocktrans %} + {% endif %} +

+
+ {% endif %} + +
+ {% csrf_token %} + +
+
+
+ + {{ import_form.archive_file }} +
+
+

{% trans "Importing this file will overwrite any data you currently have saved." %}

+

{% trans "Deselect any data you do not wish to include in your import. Books will always be imported" %}

+
+
+ +
+
+ + + + + + + + + + + + +
+
+
+ {% if not import_limit_reset and not import_size_limit or allowed_imports > 0 %} + + {% else %} + +

{% trans "You've reached the import limit." %}

+ {% endif%} +
+ +
+ +
+

{% trans "Recent Imports" %}

+
+ + + + + + + {% if not jobs %} + + + + {% endif %} + {% for job in jobs %} + + + + + + {% endfor %} +
+ {% trans "Date Created" %} + + {% trans "Last Updated" %} + + {% trans "Status" %} +
+ {% trans "No recent imports" %} +
+

{{ job.created_date }}

+
{{ job.updated_date }} + + {% if job.status %} + {{ job.status }} + {{ job.status_display }} + {% elif job.complete %} + {% trans "Complete" %} + {% else %} + {% trans "Active" %} + {% endif %} + +
+
+ + {% include 'snippets/pagination.html' with page=jobs path=request.path %} +
+{% endblock %} diff --git a/bookwyrm/templates/preferences/export-user.html b/bookwyrm/templates/preferences/export-user.html new file mode 100644 index 000000000..81f13bc22 --- /dev/null +++ b/bookwyrm/templates/preferences/export-user.html @@ -0,0 +1,89 @@ +{% extends 'preferences/layout.html' %} +{% load i18n %} + +{% block title %}{% trans "User Export" %}{% endblock %} + +{% block header %} +{% trans "User Export" %} +{% endblock %} + +{% block panel %} +
+

+ {% trans "Your exported archive file will include all user data for import into another Bookwyrm server" %} +

+

+

+ {% csrf_token %} + +
+

+
+
+

{% trans "Recent Exports" %}

+

+ {% trans "User export files will show 'complete' once ready. This may take a little while. Click the link to download your file." %} +

+
+ + + + + + + {% if not jobs %} + + + + {% endif %} + {% for job in jobs %} + + + + + + {% endfor %} +
+ {% trans "Date Created" %} + + {% trans "Last Updated" %} + + {% trans "Status" %} +
+ {% trans "No recent imports" %} +
+ {% if job.complete %} +

{{ job.created_date }}

+ {% else %} +

{{ job.created_date }}

+ {% endif %} +
{{ job.updated_date }} + + {% if job.status %} + {{ job.status }} + {{ job.status_display }} + {% elif job.complete %} + {% trans "Complete" %} + {% else %} + {% trans "Active" %} + {% endif %} + +
+
+ + {% include 'snippets/pagination.html' with page=jobs path=request.path %} +
+{% endblock %} diff --git a/bookwyrm/templates/preferences/export.html b/bookwyrm/templates/preferences/export.html index 61933be3e..6976c5e27 100644 --- a/bookwyrm/templates/preferences/export.html +++ b/bookwyrm/templates/preferences/export.html @@ -1,16 +1,16 @@ {% extends 'preferences/layout.html' %} {% load i18n %} -{% block title %}{% trans "CSV Export" %}{% endblock %} +{% block title %}{% trans "Books Export" %}{% endblock %} {% block header %} -{% trans "CSV Export" %} +{% trans "Books Export" %} {% endblock %} {% block panel %}

- {% trans "Your export will include all the books on your shelves, books you have reviewed, and books with reading activity." %} + {% trans "Your CSV export file will include all the books on your shelves, books you have reviewed, and books with reading activity.
Use this to import into a service like Goodreads." %}

diff --git a/bookwyrm/templates/preferences/layout.html b/bookwyrm/templates/preferences/layout.html index ca63ec93d..8a03e7723 100644 --- a/bookwyrm/templates/preferences/layout.html +++ b/bookwyrm/templates/preferences/layout.html @@ -32,11 +32,19 @@ diff --git a/bookwyrm/tests/data/bookwyrm_account_export.json b/bookwyrm/tests/data/bookwyrm_account_export.json new file mode 100644 index 000000000..1652d9e45 --- /dev/null +++ b/bookwyrm/tests/data/bookwyrm_account_export.json @@ -0,0 +1,452 @@ +{ + "user": { + "username": "rat@www.example.com", + "name": "Rat", + "summary": "I love to make soup in Paris and eat pizza in New York", + "manually_approves_followers": true, + "hide_follows": true, + "show_goal": false, + "show_suggested_users": false, + "discoverable": false, + "preferred_timezone": "Australia/Adelaide", + "default_post_privacy": "followers" + }, + "goals": [ + { + "goal": 12, + "year": 2023, + "privacy": "followers" + } + ], + "books": [ + { + "id": 4880, + "created_date": "2023-08-14T02:03:12.509Z", + "updated_date": "2023-08-14T02:04:51.602Z", + "remote_id": "https://www.example.com/book/4880", + "origin_id": "https://bookwyrm.social/book/9389", + "openlibrary_key": "OL680025M", + "inventaire_id": "isbn:9780300070163", + "librarything_key": null, + "goodreads_key": null, + "bnf_id": null, + "viaf": null, + "wikidata": null, + "asin": null, + "aasin": null, + "isfdb": null, + "search_vector": "'c':16C 'certain':6B 'condit':12B 'fail':14B 'human':11B 'improv':9B 'james':15C 'like':2A 'scheme':7B 'scott':17C 'see':1A 'state':4A", + "last_edited_by_id": 243, + "connector_id": null, + "title": "Seeing Like a State", + "sort_title": "seeing like a state", + "subtitle": "how certain schemes to improve the human condition have failed", + "description": "

Examines how (sometimes quasi-) authoritarian high-modernist planning fails to deliver the goods, be they increased resources for the state or a better life for the people.

", + "languages": [ + "English" + ], + "series": "", + "series_number": "", + "subjects": [], + "subject_places": [], + "cover": "covers/d273d638-191d-4ebf-b213-3c60dbf010fe.jpeg", + "preview_image": "", + "first_published_date": null, + "published_date": "1998-03-30T00:00:00Z", + "edition": { + "id": 4880, + "created_date": "2023-08-14T02:03:12.509Z", + "updated_date": "2023-08-14T02:04:51.602Z", + "remote_id": "https://www.example.com/book/4880", + "origin_id": "https://bookwyrm.social/book/9389", + "openlibrary_key": "OL680025M", + "inventaire_id": "isbn:9780300070163", + "librarything_key": null, + "goodreads_key": null, + "bnf_id": null, + "viaf": null, + "wikidata": null, + "asin": null, + "aasin": null, + "isfdb": null, + "search_vector": "'c':16C 'certain':6B 'condit':12B 'fail':14B 'human':11B 'improv':9B 'james':15C 'like':2A 'scheme':7B 'scott':17C 'see':1A 'state':4A", + "last_edited_by_id": 243, + "connector_id": null, + "title": "Seeing Like a State", + "sort_title": "seeing like a state", + "subtitle": "how certain schemes to improve the human condition have failed", + "description": "

Examines how (sometimes quasi-) authoritarian high-modernist planning fails to deliver the goods, be they increased resources for the state or a better life for the people.

", + "languages": [ + "English" + ], + "series": "", + "series_number": "", + "subjects": [], + "subject_places": [], + "cover": "covers/d273d638-191d-4ebf-b213-3c60dbf010fe.jpeg", + "preview_image": "", + "first_published_date": null, + "published_date": "1998-03-30T00:00:00Z", + "book_ptr_id": 4880, + "isbn_10": "0300070160", + "isbn_13": "9780300070163", + "oclc_number": "", + "pages": 445, + "physical_format": "", + "physical_format_detail": "", + "publishers": [], + "parent_work_id": 4877, + "edition_rank": 8 + }, + "authors": [ + { + "id": 1189, + "created_date": "2023-08-14T02:03:11.578Z", + "updated_date": "2023-08-14T02:03:11.578Z", + "remote_id": "https://www.example.com/author/1189", + "origin_id": "https://bookwyrm.social/author/1110", + "openlibrary_key": "OL4398216A", + "inventaire_id": "wd:Q3025403", + "librarything_key": "scottjamesc", + "goodreads_key": "11958", + "bnf_id": "120602158", + "viaf": "47858502", + "wikidata": "Q3025403", + "asin": "B001H9W1D2", + "aasin": null, + "search_vector": null, + "last_edited_by_id": 62, + "wikipedia_link": "https://en.wikipedia.org/wiki/James_C._Scott", + "isni": "0000000108973024", + "gutenberg_id": null, + "isfdb": null, + "website": "", + "born": "1934-12-01T23:00:00Z", + "died": null, + "name": "James C. Scott", + "aliases": [ + "James Campbell Scott", + "\u30b8\u30a7\u30fc\u30e0\u30ba\u30fbC. \u30b9\u30b3\u30c3\u30c8", + "\u30b8\u30a7\u30fc\u30e0\u30ba\u30fbC\u30fb\u30b9\u30b3\u30c3\u30c8", + "\u062c\u06cc\u0645\u0632 \u0633\u06cc. \u0627\u0633\u06a9\u0627\u062a", + "Jim Scott", + "\u062c\u064a\u0645\u0633 \u0633\u0643\u0648\u062a", + "James C. Scott", + "\u0414\u0436\u0435\u0439\u043c\u0441 \u0421\u043a\u043e\u0442\u0442", + "\u30b8\u30a7\u30fc\u30e0\u30b9\u30fbC \u30b9\u30b3\u30c3\u30c8", + "James Cameron Scott" + ], + "bio": "

American political scientist and anthropologist

" + } + ], + "readthroughs": [ + { + "id": 1, + "created_date": "2023-08-14T04:00:27.544Z", + "updated_date": "2023-08-14T04:00:27.546Z", + "remote_id": "https://www.example.com/user/rat/readthrough/1", + "user_id": 1, + "book_id": 4880, + "progress": null, + "progress_mode": "PG", + "start_date": "2018-01-01T00:00:00Z", + "finish_date": "2023-08-13T00:00:00Z", + "stopped_date": null, + "is_active": false + } + ], + "shelves": [ + { + "id": 3, + "created_date": "2023-08-13T05:02:16.554Z", + "updated_date": "2023-08-13T05:02:16.554Z", + "remote_id": "https://www.example.com/user/rat/books/read", + "name": "Read", + "identifier": "read", + "description": null, + "user_id": 1, + "editable": false, + "privacy": "public" + }, + { + "id": 1, + "created_date": "2023-08-13T05:02:16.551Z", + "updated_date": "2023-08-13T05:02:16.552Z", + "remote_id": "https://www.example.com/user/rat/books/to-read", + "name": "To Read", + "identifier": "to-read", + "description": null, + "user_id": 1, + "editable": false, + "privacy": "public" + } + ], + "shelf_books": { + "read": [ + { + "id": 1, + "created_date": "2023-08-14T02:51:09.005Z", + "updated_date": "2023-08-14T02:51:09.015Z", + "remote_id": "https://www.example.com/user/rat/shelfbook/1", + "book_id": 4880, + "shelf_id": 3, + "shelved_date": "2023-08-13T03:52:49.196Z", + "user_id": 1 + } + ], + "to-read": [ + { + "id": 2, + "created_date": "2023-08-14T04:00:27.558Z", + "updated_date": "2023-08-14T04:00:27.564Z", + "remote_id": "https://www.example.com/user/rat/shelfbook/2", + "book_id": 4880, + "shelf_id": 1, + "shelved_date": "2023-08-13T03:51:13.175Z", + "user_id": 1 + } + ] + }, + "lists": [ + { + "id": 2, + "created_date": "2023-08-14T04:00:27.585Z", + "updated_date": "2023-08-14T04:02:54.826Z", + "remote_id": "https://www.example.com/list/2", + "name": "my list of books", + "user_id": 1, + "description": "Here is a description of my list", + "privacy": "followers", + "curation": "closed", + "group_id": null, + "embed_key": "6759a53e-3581-4685-b77a-7de765c03480" + } + ], + "list_items": { + "my list of books": [ + { + "id": 1, + "created_date": "2023-08-14T04:02:54.806Z", + "updated_date": "2023-08-14T04:02:54.808Z", + "remote_id": "https://www.example.com/user/rat/listitem/1", + "book_id": 4880, + "book_list_id": 2, + "user_id": 1, + "notes": "It's fun.", + "approved": true, + "order": 1 + } + ] + }, + "reviews": [ + { + "id": 1082, + "created_date": "2023-08-14T04:09:18.354Z", + "updated_date": "2023-08-14T04:09:18.382Z", + "remote_id": "https://www.example.com/user/rat/review/1082", + "user_id": 1, + "content": "

I like it

", + "raw_content": "I like it", + "local": true, + "content_warning": "Here's a spoiler alert", + "privacy": "followers", + "sensitive": true, + "published_date": "2023-08-14T04:09:18.343Z", + "edited_date": null, + "deleted": false, + "deleted_date": null, + "reply_parent_id": null, + "thread_id": 1082, + "ready": true, + "status_ptr_id": 1082, + "book_id": 4880, + "reading_status": null, + "name": "great book", + "rating": "5.00" + } + ], + "comments": [], + "quotes": [] + }, + { + "id": 6190, + "created_date": "2023-08-14T04:48:02.034Z", + "updated_date": "2023-08-14T04:48:02.174Z", + "remote_id": "https://www.example.com/book/6190", + "origin_id": "https://bookrastinating.com/book/330127", + "openlibrary_key": null, + "inventaire_id": "isbn:9780062975645", + "librarything_key": null, + "goodreads_key": null, + "bnf_id": null, + "viaf": null, + "wikidata": null, + "asin": null, + "aasin": null, + "isfdb": null, + "search_vector": "'indigen':4A 'sand':1A 'save':7A 'talk':2A 'think':5A 'tyson':10C 'world':9A 'yunkaporta':11C", + "last_edited_by_id": null, + "connector_id": null, + "title": "Sand Talk: How Indigenous Thinking Can Save the World", + "sort_title": null, + "subtitle": null, + "description": null, + "languages": [ + "English" + ], + "series": "", + "series_number": "", + "subjects": [], + "subject_places": [], + "cover": "covers/6a553a08-2641-42a1-baa4-960df9edbbfc.jpeg", + "preview_image": "", + "first_published_date": null, + "published_date": "2020-11-26T00:00:00Z", + "edition": { + "id": 4265, + "created_date": "2023-08-24T10:18:16.563Z", + "updated_date": "2023-08-24T10:18:16.649Z", + "remote_id": "https://www.example.com/book/4265", + "origin_id": "https://bookwyrm.social/book/65189", + "openlibrary_key": "OL28216445M", + "inventaire_id": null, + "librarything_key": "", + "goodreads_key": null, + "bnf_id": null, + "viaf": null, + "wikidata": null, + "asin": null, + "aasin": null, + "isfdb": null, + "search_vector": "'indigen':4B 'sand':1A 'save':7B 'talk':2A 'think':5B 'tyson':10C 'world':9B 'yunkaporta':11C", + "last_edited_by_id": 241, + "connector_id": null, + "title": "Sand Talk", + "sort_title": null, + "subtitle": "How Indigenous Thinking Can Save the World", + "description": "

As an indigenous person, Tyson Yunkaporta looks at global systems from a unique perspective, one tied to the natural and spiritual world. In considering how contemporary life diverges from the pattern of creation, he raises important questions. How does this affect us? How can we do things differently?

\n

In this thoughtful, culturally rich, mind-expanding book, he provides answers. Yunkaporta\u2019s writing process begins with images. Honoring indigenous traditions, he makes carvings of what he wants to say, channeling his thoughts through symbols and diagrams rather than words. He yarns with people, looking for ways to connect images and stories with place and relationship to create a coherent world view, and he uses sand talk, the Aboriginal custom of drawing images on the ground to convey knowledge.

\n

In Sand Talk, he provides a new model for our everyday lives. Rich in ideas and inspiration, it explains how lines and symbols and shapes can help us make sense of the world. It\u2019s about how we learn and how we remember. It\u2019s about talking to everyone and listening carefully. It\u2019s about finding different ways to look at things.

\n

Most of all it\u2019s about a very special way of thinking, of learning to see from a native perspective, one that is spiritually and physically tied to the earth around us, and how it can save our world.

\n

Sand Talk include 22 black-and-white illustrations that add depth to the text.

", + "languages": [], + "series": "", + "series_number": "", + "subjects": [], + "subject_places": [], + "cover": "covers/70d90f7d-8b81-431d-9b00-ca2656b06ca0.jpeg", + "preview_image": "", + "first_published_date": null, + "published_date": "2020-05-12T00:00:00Z", + "book_ptr_id": 4265, + "isbn_10": "", + "isbn_13": "", + "oclc_number": "", + "pages": 256, + "physical_format": "", + "physical_format_detail": "hardcover", + "publishers": [ + "HarperOne" + ], + "parent_work_id": 4263, + "edition_rank": 5 + }, + "authors": [ + { + "id": 1390, + "created_date": "2023-08-14T04:48:00.433Z", + "updated_date": "2023-08-14T04:48:00.436Z", + "remote_id": "https://www.example.com/author/1390", + "origin_id": "https://bookrastinating.com/author/52150", + "openlibrary_key": null, + "inventaire_id": null, + "librarything_key": null, + "goodreads_key": null, + "bnf_id": null, + "viaf": null, + "wikidata": null, + "asin": null, + "aasin": null, + "search_vector": null, + "last_edited_by_id": null, + "wikipedia_link": "", + "isni": null, + "gutenberg_id": null, + "isfdb": null, + "website": "", + "born": null, + "died": null, + "name": "Tyson Yunkaporta", + "aliases": [], + "bio": null + } + ], + "readthroughs": [], + "shelves": [], + "shelf_books": {}, + "lists": [], + "list_items": {}, + "reviews": [], + "comments": [ + { + "id": 1083, + "created_date": "2023-08-14T04:48:18.753Z", + "updated_date": "2023-08-14T04:48:18.769Z", + "remote_id": "https://www.example.com/user/rat/comment/1083", + "user_id": 1, + "content": "

this is a comment about an amazing book

", + "raw_content": "this is a comment about an amazing book", + "local": true, + "content_warning": null, + "privacy": "followers", + "sensitive": false, + "published_date": "2023-08-14T04:48:18.746Z", + "edited_date": null, + "deleted": false, + "deleted_date": null, + "reply_parent_id": null, + "thread_id": 1083, + "ready": true, + "status_ptr_id": 1083, + "book_id": 6190, + "reading_status": null, + "progress": null, + "progress_mode": "PG" + } + ], + "quotes": [ + { + "id": 1084, + "created_date": "2023-08-14T04:48:50.216Z", + "updated_date": "2023-08-14T04:48:50.234Z", + "remote_id": "https://www.example.com/user/rat/quotation/1084", + "user_id": 1, + "content": "

not actually from this book lol

", + "raw_content": "not actually from this book lol", + "local": true, + "content_warning": "spoiler ahead!", + "privacy": "followers", + "sensitive": true, + "published_date": "2023-08-14T04:48:50.207Z", + "edited_date": null, + "deleted": false, + "deleted_date": null, + "reply_parent_id": null, + "thread_id": 1084, + "ready": true, + "status_ptr_id": 1084, + "book_id": 6190, + "reading_status": null, + "quote": "

To be or not to be

", + "raw_quote": "To be or not to be", + "position": 1, + "endposition": null, + "position_mode": "PG" + } + ] + } + ], + "saved_lists": [ + "https://local.lists/9999" + ], + "follows": [ + "https://your.domain.here/user/rat" + ], + "blocked_users": ["https://your.domain.here/user/badger"] +} \ No newline at end of file diff --git a/bookwyrm/tests/data/bookwyrm_account_export.tar.gz b/bookwyrm/tests/data/bookwyrm_account_export.tar.gz new file mode 100644 index 000000000..7612db57e Binary files /dev/null and b/bookwyrm/tests/data/bookwyrm_account_export.tar.gz differ diff --git a/bookwyrm/tests/data/simple_user_export.json b/bookwyrm/tests/data/simple_user_export.json new file mode 100644 index 000000000..39d9074ae --- /dev/null +++ b/bookwyrm/tests/data/simple_user_export.json @@ -0,0 +1,26 @@ +{ + "user": { + "username": "hugh@example.com", + "name": "Hugh", + "summary": "just a test account", + "manually_approves_followers": false, + "hide_follows": false, + "show_goal": true, + "show_suggested_users": true, + "discoverable": true, + "preferred_timezone": "Australia/Broken_Hill", + "default_post_privacy": "public", + "avatar": "" + }, + "goals": [ + { + "goal": 12, + "year": 2023, + "privacy": "public" + } + ], + "books": [], + "saved_lists": [], + "follows": [], + "blocked_users": [] +} \ No newline at end of file diff --git a/bookwyrm/tests/models/test_bookwyrm_import_model.py b/bookwyrm/tests/models/test_bookwyrm_import_model.py new file mode 100644 index 000000000..644cbd265 --- /dev/null +++ b/bookwyrm/tests/models/test_bookwyrm_import_model.py @@ -0,0 +1,548 @@ +""" testing models """ + +import json +import pathlib +from unittest.mock import patch + +from django.db.models import Q +from django.utils import timezone +from django.utils.dateparse import parse_datetime +from django.test import TestCase + +from bookwyrm import models +from bookwyrm.settings import DOMAIN +from bookwyrm.utils.tar import BookwyrmTarFile +import bookwyrm.models.bookwyrm_import_job as bookwyrm_import_job + + +class BookwyrmImport(TestCase): + """testing user import functions""" + + def setUp(self): + """setting stuff up""" + with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch( + "bookwyrm.activitystreams.populate_stream_task.delay" + ), patch("bookwyrm.lists_stream.populate_lists_task.delay"), patch( + "bookwyrm.suggested_users.rerank_user_task.delay" + ): + + self.local_user = models.User.objects.create_user( + "mouse", + "mouse@mouse.mouse", + "password", + local=True, + localname="mouse", + name="Mouse", + summary="I'm a real bookmouse", + manually_approves_followers=False, + hide_follows=False, + show_goal=True, + show_suggested_users=True, + discoverable=True, + preferred_timezone="America/Los Angeles", + default_post_privacy="public", + ) + + self.rat_user = models.User.objects.create_user( + "rat", "rat@rat.rat", "password", local=True, localname="rat" + ) + + self.badger_user = models.User.objects.create_user( + "badger", + "badger@badger.badger", + "password", + local=True, + localname="badger", + ) + + self.work = models.Work.objects.create(title="Test Book") + + self.book = models.Edition.objects.create( + title="Test Book", + remote_id="https://example.com/book/1234", + openlibrary_key="OL28216445M", + parent_work=self.work, + ) + + archive_file = pathlib.Path(__file__).parent.joinpath( + "../data/bookwyrm_account_export.tar.gz" + ) + self.tarfile = BookwyrmTarFile.open( + mode="r:gz", fileobj=open(archive_file, "rb") + ) + self.import_data = json.loads( + self.tarfile.read("archive.json").decode("utf-8") + ) + + def test_update_user_profile(self): + """Test update the user's profile from import data""" + + # TODO once the tar is set up + pass + + def test_update_user_settings(self): + """Test updating the user's settings from import data""" + + with patch("bookwyrm.suggested_users.remove_user_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + + models.bookwyrm_import_job.update_user_settings( + self.local_user, self.import_data.get("user") + ) + self.local_user.refresh_from_db() + + self.assertEqual(self.local_user.manually_approves_followers, True) + self.assertEqual(self.local_user.hide_follows, True) + self.assertEqual(self.local_user.show_goal, False) + self.assertEqual(self.local_user.show_suggested_users, False) + self.assertEqual(self.local_user.discoverable, False) + self.assertEqual(self.local_user.preferred_timezone, "Australia/Adelaide") + self.assertEqual(self.local_user.default_post_privacy, "followers") + + def test_update_goals(self): + """Test update the user's goals from import data""" + + models.AnnualGoal.objects.create( + user=self.local_user, + year=2023, + goal=999, + privacy="public", + ) + + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + + models.bookwyrm_import_job.update_goals( + self.local_user, self.import_data.get("goals") + ) + + self.local_user.refresh_from_db() + goal = models.AnnualGoal.objects.get() + self.assertEqual(goal.year, 2023) + self.assertEqual(goal.goal, 12) + self.assertEqual(goal.privacy, "followers") + + def test_upsert_saved_lists_existing(self): + """Test upserting an existing saved list""" + + with patch("bookwyrm.lists_stream.remove_list_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + book_list = models.List.objects.create( + name="My cool list", + user=self.rat_user, + remote_id="https://local.lists/9999", + ) + + self.assertFalse(self.local_user.saved_lists.filter(id=book_list.id).exists()) + + self.local_user.saved_lists.add(book_list) + + self.assertTrue(self.local_user.saved_lists.filter(id=book_list.id).exists()) + + with patch("bookwyrm.activitypub.base_activity.resolve_remote_id"): + models.bookwyrm_import_job.upsert_saved_lists( + self.local_user, ["https://local.lists/9999"] + ) + saved_lists = self.local_user.saved_lists.filter( + remote_id="https://local.lists/9999" + ).all() + self.assertEqual(len(saved_lists), 1) + + def test_upsert_saved_lists_not_existing(self): + """Test upserting a new saved list""" + + with patch("bookwyrm.lists_stream.remove_list_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + book_list = models.List.objects.create( + name="My cool list", + user=self.rat_user, + remote_id="https://local.lists/9999", + ) + + self.assertFalse(self.local_user.saved_lists.filter(id=book_list.id).exists()) + + with patch("bookwyrm.activitypub.base_activity.resolve_remote_id"): + models.bookwyrm_import_job.upsert_saved_lists( + self.local_user, ["https://local.lists/9999"] + ) + + self.assertTrue(self.local_user.saved_lists.filter(id=book_list.id).exists()) + + def test_upsert_follows(self): + """Test take a list of remote ids and add as follows""" + + before_follow = models.UserFollows.objects.filter( + user_subject=self.local_user, user_object=self.rat_user + ).exists() + + self.assertFalse(before_follow) + + with patch("bookwyrm.activitystreams.add_user_statuses_task.delay"), patch( + "bookwyrm.lists_stream.add_user_lists_task.delay" + ), patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + models.bookwyrm_import_job.upsert_follows( + self.local_user, self.import_data.get("follows") + ) + + after_follow = models.UserFollows.objects.filter( + user_subject=self.local_user, user_object=self.rat_user + ).exists() + self.assertTrue(after_follow) + + def test_upsert_user_blocks(self): + """test adding blocked users""" + + blocked_before = models.UserBlocks.objects.filter( + Q( + user_subject=self.local_user, + user_object=self.badger_user, + ) + ).exists() + self.assertFalse(blocked_before) + + with patch("bookwyrm.suggested_users.remove_suggestion_task.delay"), patch( + "bookwyrm.activitystreams.remove_user_statuses_task.delay" + ), patch("bookwyrm.lists_stream.remove_user_lists_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + models.bookwyrm_import_job.upsert_user_blocks( + self.local_user, self.import_data.get("blocked_users") + ) + + blocked_after = models.UserBlocks.objects.filter( + Q( + user_subject=self.local_user, + user_object=self.badger_user, + ) + ).exists() + self.assertTrue(blocked_after) + + def test_get_or_create_authors(self): + """Test taking a JSON string of authors find or create the authors + in the database and returning a list of author instances""" + + author_exists = models.Author.objects.filter(isni="0000000108973024").exists() + self.assertFalse(author_exists) + + authors = self.import_data.get("books")[0]["authors"] + bookwyrm_import_job.get_or_create_authors(authors) + + author = models.Author.objects.get(isni="0000000108973024") + self.assertEqual(author.name, "James C. Scott") + + def test_get_or_create_edition_existing(self): + """Test take a JSON string of books and editions, find or create the editions in the database and return a list of edition instances""" + + self.assertEqual(models.Edition.objects.count(), 1) + self.assertEqual(models.Edition.objects.count(), 1) + + bookwyrm_import_job.get_or_create_edition( + self.import_data["books"][1], self.tarfile + ) # Sand Talk + + self.assertEqual(models.Edition.objects.count(), 1) + + def test_get_or_create_edition_not_existing(self): + """Test take a JSON string of books and editions, find or create the editions in the database and return a list of edition instances""" + + self.assertEqual(models.Edition.objects.count(), 1) + + bookwyrm_import_job.get_or_create_edition( + self.import_data["books"][0], self.tarfile + ) # Seeing like a state + + self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists()) + self.assertEqual(models.Edition.objects.count(), 2) + + def test_clean_values(self): + """test clean values we don't want when creating new instances""" + + author = self.import_data.get("books")[0]["authors"][0] + edition = self.import_data.get("books")[0]["edition"] + + cleaned_author = bookwyrm_import_job.clean_values(author) + cleaned_edition = bookwyrm_import_job.clean_values(edition) + + self.assertEqual(cleaned_author["name"], "James C. Scott") + self.assertEqual(cleaned_author.get("id"), None) + self.assertEqual(cleaned_author.get("remote_id"), None) + self.assertEqual(cleaned_author.get("last_edited_by"), None) + self.assertEqual(cleaned_author.get("last_edited_by_id"), None) + + self.assertEqual(cleaned_edition.get("title"), "Seeing Like a State") + self.assertEqual(cleaned_edition.get("id"), None) + self.assertEqual(cleaned_edition.get("remote_id"), None) + self.assertEqual(cleaned_edition.get("last_edited_by"), None) + self.assertEqual(cleaned_edition.get("last_edited_by_id"), None) + self.assertEqual(cleaned_edition.get("cover"), None) + self.assertEqual(cleaned_edition.get("preview_image "), None) + self.assertEqual(cleaned_edition.get("user"), None) + self.assertEqual(cleaned_edition.get("book_list"), None) + self.assertEqual(cleaned_edition.get("shelf_book"), None) + + def test_find_existing(self): + """Given a book or author, find any existing model instances""" + + self.assertEqual(models.Book.objects.count(), 2) # includes Work + self.assertEqual(models.Edition.objects.count(), 1) + self.assertEqual(models.Edition.objects.first().title, "Test Book") + self.assertEqual(models.Edition.objects.first().openlibrary_key, "OL28216445M") + + existing = bookwyrm_import_job.find_existing( + models.Edition, {"openlibrary_key": "OL28216445M", "isbn_10": None}, None + ) + self.assertEqual(existing.title, "Test Book") + + def test_upsert_readthroughs(self): + """Test take a JSON string of readthroughs, find or create the + instances in the database and return a list of saved instances""" + + readthroughs = [ + { + "id": 1, + "created_date": "2023-08-24T10:18:45.923Z", + "updated_date": "2023-08-24T10:18:45.928Z", + "remote_id": "https://example.com/mouse/readthrough/1", + "user_id": 1, + "book_id": 1234, + "progress": None, + "progress_mode": "PG", + "start_date": "2022-12-31T13:30:00Z", + "finish_date": "2023-08-23T14:30:00Z", + "stopped_date": None, + "is_active": False, + } + ] + + self.assertEqual(models.ReadThrough.objects.count(), 0) + bookwyrm_import_job.upsert_readthroughs( + readthroughs, self.local_user, self.book.id + ) + + self.assertEqual(models.ReadThrough.objects.count(), 1) + self.assertEqual(models.ReadThrough.objects.first().progress_mode, "PG") + self.assertEqual( + models.ReadThrough.objects.first().start_date, + parse_datetime("2022-12-31T13:30:00Z"), + ) + self.assertEqual(models.ReadThrough.objects.first().book_id, self.book.id) + self.assertEqual(models.ReadThrough.objects.first().user, self.local_user) + + def test_get_or_create_review_status(self): + """Test get_or_create_review_status with a review""" + + self.assertEqual(models.Review.objects.filter(user=self.local_user).count(), 0) + reviews = self.import_data["books"][0]["reviews"] + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + bookwyrm_import_job.get_or_create_statuses( + self.local_user, models.Review, reviews, self.book.id + ) + self.assertEqual(models.Review.objects.filter(user=self.local_user).count(), 1) + self.assertEqual( + models.Review.objects.filter(book=self.book).first().raw_content, + "I like it", + ) + self.assertEqual( + models.Review.objects.filter(book=self.book).first().content_warning, + "Here's a spoiler alert", + ) + self.assertEqual( + models.Review.objects.filter(book=self.book).first().sensitive, True + ) + self.assertEqual( + models.Review.objects.filter(book=self.book).first().published_date, + parse_datetime("2023-08-14T04:09:18.343Z"), + ) + self.assertEqual( + models.Review.objects.filter(book=self.book).first().name, "great book" + ) + self.assertEqual( + models.Review.objects.filter(book=self.book).first().rating, 5.00 + ) + + def test_get_or_create_comment_status(self): + """Test get_or_create_review_status with a comment""" + + self.assertEqual(models.Comment.objects.filter(user=self.local_user).count(), 0) + comments = self.import_data["books"][1]["comments"] + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + bookwyrm_import_job.get_or_create_statuses( + self.local_user, models.Comment, comments, self.book.id + ) + self.assertEqual(models.Comment.objects.filter(user=self.local_user).count(), 1) + self.assertEqual( + models.Comment.objects.filter(book=self.book).first().raw_content, + "this is a comment about an amazing book", + ) + self.assertEqual( + models.Comment.objects.filter(book=self.book).first().content_warning, None + ) + self.assertEqual( + models.Comment.objects.filter(book=self.book).first().sensitive, False + ) + self.assertEqual( + models.Comment.objects.filter(book=self.book).first().published_date, + parse_datetime("2023-08-14T04:48:18.746Z"), + ) + self.assertEqual( + models.Comment.objects.filter(book=self.book).first().progress_mode, "PG" + ) + + def test_get_or_create_comment_quote(self): + """Test get_or_create_review_status with a quote""" + + self.assertEqual( + models.Quotation.objects.filter(user=self.local_user).count(), 0 + ) + quotes = self.import_data["books"][1]["quotes"] + with patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"): + bookwyrm_import_job.get_or_create_statuses( + self.local_user, models.Quotation, quotes, self.book.id + ) + self.assertEqual( + models.Quotation.objects.filter(user=self.local_user).count(), 1 + ) + self.assertEqual( + models.Quotation.objects.filter(book=self.book).first().raw_content, + "not actually from this book lol", + ) + self.assertEqual( + models.Quotation.objects.filter(book=self.book).first().content_warning, + "spoiler ahead!", + ) + self.assertEqual( + models.Quotation.objects.filter(book=self.book).first().raw_quote, + "To be or not to be", + ) + self.assertEqual( + models.Quotation.objects.filter(book=self.book).first().published_date, + parse_datetime("2023-08-14T04:48:50.207Z"), + ) + self.assertEqual( + models.Quotation.objects.filter(book=self.book).first().position_mode, "PG" + ) + self.assertEqual( + models.Quotation.objects.filter(book=self.book).first().position, 1 + ) + + def test_upsert_list_existing(self): + """Take a list and ListItems as JSON and create DB entries if they don't already exist""" + + book_data = self.import_data["books"][0] + + other_book = models.Edition.objects.create( + title="Another Book", remote_id="https://example.com/book/9876" + ) + + with patch("bookwyrm.lists_stream.remove_list_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + book_list = models.List.objects.create( + name="my list of books", user=self.local_user + ) + + list_item = models.ListItem.objects.create( + book=self.book, book_list=book_list, user=self.local_user, order=1 + ) + + self.assertTrue(models.List.objects.filter(id=book_list.id).exists()) + self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1) + self.assertEqual( + models.ListItem.objects.filter( + user=self.local_user, book_list=book_list + ).count(), + 1, + ) + + with patch("bookwyrm.lists_stream.remove_list_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + bookwyrm_import_job.upsert_lists( + self.local_user, + book_data["lists"], + book_data["list_items"], + other_book.id, + ) + + self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1) + self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1) + self.assertEqual( + models.ListItem.objects.filter( + user=self.local_user, book_list=book_list + ).count(), + 2, + ) + + def test_upsert_list_not_existing(self): + """Take a list and ListItems as JSON and create DB entries if they don't already exist""" + + book_data = self.import_data["books"][0] + + self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 0) + self.assertFalse(models.ListItem.objects.filter(book=self.book.id).exists()) + + with patch("bookwyrm.lists_stream.remove_list_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + bookwyrm_import_job.upsert_lists( + self.local_user, + book_data["lists"], + book_data["list_items"], + self.book.id, + ) + + self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1) + self.assertEqual( + models.ListItem.objects.filter(user=self.local_user).count(), 1 + ) + + def test_upsert_shelves_existing(self): + """Take shelf and ShelfBooks JSON objects and create + DB entries if they don't already exist""" + + self.assertEqual( + models.ShelfBook.objects.filter(user=self.local_user.id).count(), 0 + ) + + shelf = models.Shelf.objects.get(name="Read", user=self.local_user) + + with patch("bookwyrm.activitystreams.add_book_statuses_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + models.ShelfBook.objects.create( + book=self.book, shelf=shelf, user=self.local_user + ) + + book_data = self.import_data["books"][0] + with patch("bookwyrm.activitystreams.add_book_statuses_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + bookwyrm_import_job.upsert_shelves(self.book, self.local_user, book_data) + + self.assertEqual( + models.ShelfBook.objects.filter(user=self.local_user.id).count(), 2 + ) + + def test_upsert_shelves_not_existing(self): + """Take shelf and ShelfBooks JSON objects and create + DB entries if they don't already exist""" + + self.assertEqual( + models.ShelfBook.objects.filter(user=self.local_user.id).count(), 0 + ) + + book_data = self.import_data["books"][0] + + with patch("bookwyrm.activitystreams.add_book_statuses_task.delay"), patch( + "bookwyrm.models.activitypub_mixin.broadcast_task.apply_async" + ): + bookwyrm_import_job.upsert_shelves(self.book, self.local_user, book_data) + + self.assertEqual( + models.ShelfBook.objects.filter(user=self.local_user.id).count(), 2 + ) + self.assertEqual( + models.Shelf.objects.filter(user=self.local_user.id).count(), 2 + ) diff --git a/bookwyrm/tests/utils/test_tar.py b/bookwyrm/tests/utils/test_tar.py new file mode 100644 index 000000000..5989d3bb9 --- /dev/null +++ b/bookwyrm/tests/utils/test_tar.py @@ -0,0 +1,23 @@ +from bookwyrm.utils.tar import BookwyrmTarFile +import pytest + + +@pytest.fixture +def read_tar(): + archive_path = "../data/bookwyrm_account_export.tar.gz" + with open(archive_path, "rb") as archive_file: + with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: + yield tar + + +def get_write_tar(): + archive_path = "/tmp/test.tar.gz" + with open(archive_path, "wb") as archive_file: + with BookwyrmTarFile.open(mode="w:gz", fileobj=archive_file) as tar: + return tar + + os.remove(archive_path) + + +def test_write_bytes(write_tar): + write_tar.write_bytes(b"ABCDEF", filename="example.txt") diff --git a/bookwyrm/tests/views/imports/test_user_import.py b/bookwyrm/tests/views/imports/test_user_import.py new file mode 100644 index 000000000..db5837101 --- /dev/null +++ b/bookwyrm/tests/views/imports/test_user_import.py @@ -0,0 +1,68 @@ +""" test for app action functionality """ +import pathlib +from unittest.mock import patch + +from django.core.files.uploadedfile import SimpleUploadedFile +from django.template.response import TemplateResponse +from django.test import TestCase +from django.test.client import RequestFactory + +from bookwyrm import forms, models, views +from bookwyrm.tests.validate_html import validate_html + + +class ImportUserViews(TestCase): + """user import views""" + + # pylint: disable=invalid-name + def setUp(self): + """we need basic test data and mocks""" + self.factory = RequestFactory() + with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch( + "bookwyrm.activitystreams.populate_stream_task.delay" + ), patch("bookwyrm.lists_stream.populate_lists_task.delay"): + self.local_user = models.User.objects.create_user( + "mouse@local.com", + "mouse@mouse.mouse", + "password", + local=True, + localname="mouse", + ) + models.SiteSettings.objects.create() + + def test_get_user_import_page(self): + """there are so many views, this just makes sure it LOADS""" + view = views.UserImport.as_view() + request = self.factory.get("") + request.user = self.local_user + result = view(request) + self.assertIsInstance(result, TemplateResponse) + validate_html(result.render()) + self.assertEqual(result.status_code, 200) + + def test_user_import_post(self): + """does the import job start?""" + + view = views.UserImport.as_view() + form = forms.ImportUserForm() + archive_file = pathlib.Path(__file__).parent.joinpath( + "../../data/bookwyrm_account_export.tar.gz" + ) + + form.data["archive_file"] = SimpleUploadedFile( + # pylint: disable=consider-using-with + archive_file, + open(archive_file, "rb").read(), + content_type="application/gzip", + ) + + form.data["include_user_settings"] = "" + form.data["include_goals"] = "on" + + request = self.factory.post("", form.data) + request.user = self.local_user + + with patch("bookwyrm.models.bookwyrm_import_job.BookwyrmImportJob.start_job"): + view(request) + job = models.BookwyrmImportJob.objects.get() + self.assertEqual(job.required, ["include_goals"]) diff --git a/bookwyrm/tests/views/preferences/test_export_user.py b/bookwyrm/tests/views/preferences/test_export_user.py new file mode 100644 index 000000000..c7594749b --- /dev/null +++ b/bookwyrm/tests/views/preferences/test_export_user.py @@ -0,0 +1,74 @@ +""" test for app action functionality """ +from collections import namedtuple +from unittest.mock import patch + +from django.http import HttpResponse +from django.test import TestCase +from django.test.client import RequestFactory + +from bookwyrm import models, views +from bookwyrm.tests.validate_html import validate_html + + +class ExportUserViews(TestCase): + """exporting user data""" + + def setUp(self): + self.factory = RequestFactory() + models.SiteSettings.objects.create() + with patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), patch( + "bookwyrm.activitystreams.populate_stream_task.delay" + ): + self.local_user = models.User.objects.create_user( + "hugh@example.com", + "hugh@example.com", + "password", + local=True, + localname="Hugh", + summary="just a test account", + remote_id="https://example.com/users/hugh", + preferred_timezone="Australia/Broken_Hill", + ) + + def test_export_user_get(self, *_): + """request export""" + request = self.factory.get("") + request.user = self.local_user + result = views.ExportUser.as_view()(request) + validate_html(result.render()) + + def test_trigger_export_user_file(self, *_): + """simple user export""" + + request = self.factory.post("") + request.user = self.local_user + with patch("bookwyrm.models.bookwyrm_export_job.start_export_task.delay"): + export = views.ExportUser.as_view()(request) + self.assertIsInstance(export, HttpResponse) + self.assertEqual(export.status_code, 302) + + jobs = models.bookwyrm_export_job.BookwyrmExportJob.objects.count() + self.assertEqual(jobs, 1) + + def test_download_export_user_file(self, *_): + """simple user export""" + + # TODO: need some help with this one + job = models.bookwyrm_export_job.BookwyrmExportJob.objects.create( + user=self.local_user + ) + MockTask = namedtuple("Task", ("id")) + with patch( + "bookwyrm.models.bookwyrm_export_job.start_export_task.delay" + ) as mock: + mock.return_value = MockTask(b'{"name": "mouse"}') + job.start_job() + + request = self.factory.get("") + request.user = self.local_user + job.refresh_from_db() + export = views.ExportArchive.as_view()(request, job.id) + self.assertIsInstance(export, HttpResponse) + self.assertEqual(export.status_code, 200) + # pylint: disable=line-too-long + self.assertEqual(export.content, b'{"name": "mouse"}') diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index 05972ee73..8b2893da2 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -397,6 +397,7 @@ urlpatterns = [ re_path(r"^search/?$", views.Search.as_view(), name="search"), # imports re_path(r"^import/?$", views.Import.as_view(), name="import"), + re_path(r"^user-import/?$", views.UserImport.as_view(), name="user-import"), re_path( r"^import/(?P\d+)/?$", views.ImportStatus.as_view(), @@ -594,6 +595,16 @@ urlpatterns = [ name="prompt-2fa", ), re_path(r"^preferences/export/?$", views.Export.as_view(), name="prefs-export"), + re_path( + r"^preferences/user-export/?$", + views.ExportUser.as_view(), + name="prefs-user-export", + ), + path( + "preferences/user-export/", + views.ExportArchive.as_view(), + name="prefs-export-file", + ), re_path(r"^preferences/delete/?$", views.DeleteUser.as_view(), name="prefs-delete"), re_path( r"^preferences/deactivate/?$", diff --git a/bookwyrm/utils/tar.py b/bookwyrm/utils/tar.py new file mode 100644 index 000000000..448df48d9 --- /dev/null +++ b/bookwyrm/utils/tar.py @@ -0,0 +1,40 @@ +from uuid import uuid4 +from django.core.files import File +import tarfile +import io + + +class BookwyrmTarFile(tarfile.TarFile): + def write_bytes(self, data: bytes, filename="archive.json"): + """Add a file containing :data: bytestring with name :filename: to the archive""" + buffer = io.BytesIO(data) + info = tarfile.TarInfo("archive.json") + info.size = len(data) + self.addfile(info, fileobj=buffer) + + def add_image(self, image, filename=None, directory=""): + """ + Add an image to the tar archive + :param str filename: overrides the file name set by image + :param str directory: the directory in the archive to put the image + """ + if filename is not None: + file_type = image.name.rsplit(".", maxsplit=1)[-1] + filename = f"{directory}{filename}.{file_type}" + else: + filename = f"{directory}{image.name}" + + info = tarfile.TarInfo(name=filename) + info.size = image.size + + self.addfile(info, fileobj=image) + + def read(self, filename): + with self.extractfile(filename) as reader: + return reader.read() + + def write_image_to_file(self, filename, file_field): + extension = filename.rsplit(".")[-1] + with self.extractfile(filename) as reader: + filename = f"{str(uuid4())}.{extension}" + file_field.save(filename, File(reader)) diff --git a/bookwyrm/views/__init__.py b/bookwyrm/views/__init__.py index 84060acb7..c044200e3 100644 --- a/bookwyrm/views/__init__.py +++ b/bookwyrm/views/__init__.py @@ -36,7 +36,7 @@ from .admin.user_admin import UserAdmin, UserAdminList, ActivateUserAdmin # user preferences from .preferences.change_password import ChangePassword from .preferences.edit_user import EditUser -from .preferences.export import Export +from .preferences.export import Export, ExportUser, ExportArchive from .preferences.delete_user import DeleteUser, DeactivateUser, ReactivateUser from .preferences.block import Block, unblock from .preferences.two_factor_auth import ( @@ -80,7 +80,7 @@ from .shelf.shelf_actions import create_shelf, delete_shelf from .shelf.shelf_actions import shelve, unshelve # csv import -from .imports.import_data import Import +from .imports.import_data import Import, UserImport from .imports.import_status import ImportStatus, retry_item, stop_import from .imports.troubleshoot import ImportTroubleshoot from .imports.manually_review import ( diff --git a/bookwyrm/views/imports/import_data.py b/bookwyrm/views/imports/import_data.py index 01812e1d5..69a87c0c2 100644 --- a/bookwyrm/views/imports/import_data.py +++ b/bookwyrm/views/imports/import_data.py @@ -15,12 +15,14 @@ from django.views import View from bookwyrm import forms, models from bookwyrm.importers import ( + BookwyrmImporter, CalibreImporter, LibrarythingImporter, GoodreadsImporter, StorygraphImporter, OpenLibraryImporter, ) +from bookwyrm.models.bookwyrm_import_job import BookwyrmImportJob from bookwyrm.settings import PAGE_LENGTH from bookwyrm.utils.cache import get_or_set @@ -127,3 +129,47 @@ def get_average_import_time() -> float: if recent_avg: return recent_avg.total_seconds() return None + + +# pylint: disable= no-self-use +@method_decorator(login_required, name="dispatch") +class UserImport(View): + """import user view""" + + def get(self, request, invalid=False): + """load user import page""" + + jobs = BookwyrmImportJob.objects.filter(user=request.user).order_by( + "-created_date" + ) + paginated = Paginator(jobs, PAGE_LENGTH) + page = paginated.get_page(request.GET.get("page")) + data = { + "import_form": forms.ImportUserForm(), + "jobs": page, + "page_range": paginated.get_elided_page_range( + page.number, on_each_side=2, on_ends=1 + ), + "invalid": invalid, + } + + return TemplateResponse(request, "import/import_user.html", data) + + def post(self, request): + """ingest a Bookwyrm json file""" + + importer = BookwyrmImporter() + + form = forms.ImportUserForm(request.POST, request.FILES) + if not form.is_valid(): + return HttpResponseBadRequest() + + job = importer.process_import( + user=request.user, + archive_file=request.FILES["archive_file"], + settings=request.POST, + ) + + job.start_job() + + return redirect("user-import") diff --git a/bookwyrm/views/preferences/export.py b/bookwyrm/views/preferences/export.py index 6880318bc..28e83051e 100644 --- a/bookwyrm/views/preferences/export.py +++ b/bookwyrm/views/preferences/export.py @@ -3,13 +3,17 @@ import csv import io from django.contrib.auth.decorators import login_required +from django.core.paginator import Paginator from django.db.models import Q from django.http import HttpResponse from django.template.response import TemplateResponse from django.views import View from django.utils.decorators import method_decorator +from django.shortcuts import redirect from bookwyrm import models +from bookwyrm.models.bookwyrm_export_job import BookwyrmExportJob +from bookwyrm.settings import PAGE_LENGTH # pylint: disable=no-self-use @method_decorator(login_required, name="dispatch") @@ -84,3 +88,49 @@ class Export(View): "Content-Disposition": 'attachment; filename="bookwyrm-export.csv"' }, ) + + +# pylint: disable=no-self-use +@method_decorator(login_required, name="dispatch") +class ExportUser(View): + """Let users export user data to import into another Bookwyrm instance""" + + def get(self, request): + """Request tar file""" + + jobs = BookwyrmExportJob.objects.filter(user=request.user).order_by( + "-created_date" + ) + paginated = Paginator(jobs, PAGE_LENGTH) + page = paginated.get_page(request.GET.get("page")) + data = { + "jobs": page, + "page_range": paginated.get_elided_page_range( + page.number, on_each_side=2, on_ends=1 + ), + } + + return TemplateResponse(request, "preferences/export-user.html", data) + + def post(self, request): + """Download the json file of a user's data""" + + job = BookwyrmExportJob.objects.create(user=request.user) + job.start_job() + + return redirect("prefs-user-export") + + +@method_decorator(login_required, name="dispatch") +class ExportArchive(View): + """Serve the archive file""" + + def get(self, request, archive_id): + export = BookwyrmExportJob.objects.get(task_id=archive_id, user=request.user) + return HttpResponse( + export.export_data, + content_type="application/gzip", + headers={ + "Content-Disposition": 'attachment; filename="bookwyrm-account-export.tar.gz"' + }, + )