From 0b3b5e4c3d0692e9991ce2818013f5a3e193c57a Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Tue, 7 Nov 2023 15:35:58 +1100 Subject: [PATCH 1/8] initial work to use AP json for user export/import --- bookwyrm/models/bookwyrm_export_job.py | 116 +++++++++++++++---------- bookwyrm/models/notification.py | 2 - 2 files changed, 68 insertions(+), 50 deletions(-) diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index 80912b9e3..63f1729b2 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -10,7 +10,7 @@ from django.core.files.base import ContentFile from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, Shelf, List, ListItem from bookwyrm.models import Review, Comment, Quotation -from bookwyrm.models import Edition, Book +from bookwyrm.models import Edition from bookwyrm.models import UserFollows, User, UserBlocks from bookwyrm.models.job import ParentJob, ParentTask from bookwyrm.settings import DOMAIN @@ -63,7 +63,7 @@ def tar_export(json_data: str, user, file): if getattr(user, "avatar", False): tar.add_image(user.avatar, filename="avatar") - editions, books = get_books_for_user(user) # pylint: disable=unused-variable + editions = get_books_for_user(user) for book in editions: if getattr(book, "cover", False): tar.add_image(book.cover) @@ -113,57 +113,79 @@ def json_export(user): # pylint: disable=too-many-locals, too-many-statements readthroughs = [] # books - editions, books = get_books_for_user(user) + editions = get_books_for_user(user) final_books = [] - for book in books.values(): - edition = editions.filter(id=book["id"]) - book["edition"] = edition.values()[0] + # editions + for edition in editions: + book = {} + book[ + "edition" + ] = edition.to_activity() # <== BUG Link field class is unknown here. + # authors - book["authors"] = list(edition.first().authors.all().values()) - # readthroughs + book["authors"] = [] + for author in edition.authors.all(): + obj = author.to_activity() + book["authors"].append(obj) + + # Shelves and shelfbooks + book["shelves"] = [] + user_shelves = Shelf.objects.filter(user=user).all() + + for shelf in user_shelves: + obj = {"shelf_books": []} + obj["shelf_info"] = shelf.to_activity() + shelf_books = ShelfBook.objects.filter(book=edition, shelf=shelf).distinct() + + for shelfbook in shelf_books: + obj["shelf_books"].append(shelfbook.to_activity()) + + book["shelves"].append(obj) + + # List and ListItem + book["lists"] = [] + user_lists = List.objects.filter(user=user).all() + + for booklist in user_lists: + obj = {"list_items": []} + obj["list_info"] = booklist.to_activity() + list_items = ListItem.objects.filter(book_list=booklist).distinct() + for item in list_items: + obj["list_items"].append(item.to_activity()) + + book["lists"].append(obj) + + # Statuses + # Can't use select_subclasses here because + # we need to filter on the "book" value, + # which is not available on an ordinary Status + for x in ["comments", "quotations", "reviews"]: + book[x] = [] + + comments = Comment.objects.filter(user=user, book=edition).all() + for status in comments: + book["comments"].append(status.to_activity()) + + quotes = Quotation.objects.filter(user=user, book=edition).all() + for status in quotes: + book["quotations"].append(status.to_activity()) + + reviews = Review.objects.filter(user=user, book=edition).all() + for status in reviews: + book["reviews"].append(status.to_activity()) + + # readthroughs can't be serialized to activity book_readthroughs = ( - ReadThrough.objects.filter(user=user, book=book["id"]).distinct().values() + ReadThrough.objects.filter(user=user, book=edition).distinct().values() ) book["readthroughs"] = list(book_readthroughs) - # shelves - shelf_books = ShelfBook.objects.filter(user=user, book=book["id"]).distinct() - shelves_from_books = Shelf.objects.filter(shelfbook__in=shelf_books, user=user) - - book["shelves"] = list(shelves_from_books.values()) - book["shelf_books"] = {} - - for shelf in shelves_from_books: - shelf_contents = ShelfBook.objects.filter(user=user, shelf=shelf).distinct() - - book["shelf_books"][shelf.identifier] = list(shelf_contents.values()) - - # book lists - book_lists = List.objects.filter(books__in=[book["id"]], user=user).distinct() - book["lists"] = list(book_lists.values()) - book["list_items"] = {} - for blist in book_lists: - list_items = ListItem.objects.filter(book_list=blist).distinct() - book["list_items"][blist.name] = list(list_items.values()) - - # reviews - reviews = Review.objects.filter(user=user, book=book["id"]).distinct() - - book["reviews"] = list(reviews.values()) - - # comments - comments = Comment.objects.filter(user=user, book=book["id"]).distinct() - - book["comments"] = list(comments.values()) - - # quotes - quotes = Quotation.objects.filter(user=user, book=book["id"]).distinct() - - book["quotes"] = list(quotes.values()) # append everything final_books.append(book) + logger.info(final_books) + # saved book lists saved_lists = List.objects.filter(id__in=user.saved_lists.all()).distinct() saved_lists = [l.remote_id for l in saved_lists] @@ -192,9 +214,7 @@ def json_export(user): # pylint: disable=too-many-locals, too-many-statements def get_books_for_user(user): - """Get all the books and editions related to a user - :returns: tuple of editions, books - """ + """Get all the books and editions related to a user""" editions = Edition.objects.filter( Q(shelves__user=user) @@ -204,5 +224,5 @@ def get_books_for_user(user): | Q(comment__user=user) | Q(quotation__user=user) ).distinct() - books = Book.objects.filter(id__in=editions).distinct() - return editions, books + + return editions diff --git a/bookwyrm/models/notification.py b/bookwyrm/models/notification.py index e0aefea0a..f9cbee8d8 100644 --- a/bookwyrm/models/notification.py +++ b/bookwyrm/models/notification.py @@ -261,9 +261,7 @@ def notify_user_on_user_export_complete( """we exported your user details! aren't you proud of us""" update_fields = update_fields or [] if not instance.complete or "complete" not in update_fields: - print("RETURNING", instance.status) return - print("NOTIFYING") Notification.objects.create( user=instance.user, notification_type=Notification.USER_EXPORT, From 042d16b3609734e4d6a6a04ca48de8837513036d Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Wed, 8 Nov 2023 18:26:38 +1100 Subject: [PATCH 2/8] fix BookData fields wrong for files BookData is inherited by Book and Author Authors do not have a file_links value, and neither of them have a files value. This commit moves 'fileLinks' down to activitypub.Book (inherited in turn by Edition), and removes 'files' --- bookwyrm/activitypub/book.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bookwyrm/activitypub/book.py b/bookwyrm/activitypub/book.py index 5db0dc3ac..a53222053 100644 --- a/bookwyrm/activitypub/book.py +++ b/bookwyrm/activitypub/book.py @@ -22,8 +22,6 @@ class BookData(ActivityObject): aasin: Optional[str] = None isfdb: Optional[str] = None lastEditedBy: Optional[str] = None - links: list[str] = field(default_factory=list) - fileLinks: list[str] = field(default_factory=list) # pylint: disable=invalid-name @@ -45,6 +43,8 @@ class Book(BookData): firstPublishedDate: str = "" publishedDate: str = "" + fileLinks: list[str] = field(default_factory=list) + cover: Optional[Document] = None type: str = "Book" From c5482cb66d113dae7d6c2bfedd188aafe2ac05e5 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Thu, 9 Nov 2023 21:23:51 +1100 Subject: [PATCH 3/8] use ap models in export and import - more testing needed. --- bookwyrm/models/bookwyrm_export_job.py | 134 +++++----- bookwyrm/models/bookwyrm_import_job.py | 347 ++++++++----------------- 2 files changed, 175 insertions(+), 306 deletions(-) diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index 63f1729b2..5802b7c3e 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -1,5 +1,6 @@ """Export user account to tar.gz file for import into another Bookwyrm instance""" +import dataclasses import logging from uuid import uuid4 @@ -13,7 +14,6 @@ from bookwyrm.models import Review, Comment, Quotation from bookwyrm.models import Edition from bookwyrm.models import UserFollows, User, UserBlocks from bookwyrm.models.job import ParentJob, ParentTask -from bookwyrm.settings import DOMAIN from bookwyrm.tasks import app, IMPORTS from bookwyrm.utils.tar import BookwyrmTarFile @@ -71,57 +71,57 @@ def tar_export(json_data: str, user, file): file.close() -def json_export(user): # pylint: disable=too-many-locals, too-many-statements +def json_export( + user, +): # pylint: disable=too-many-locals, too-many-statements, too-many-branches """Generate an export for a user""" - # user + exported_user = {} + + # User as AP object + exported_user = user.to_activity() + # I don't love this but it prevents a JSON encoding error + # when there is no user image + if isinstance(exported_user["icon"], dataclasses._MISSING_TYPE): + exported_user["icon"] = {} + else: + # change the URL to be relative to the JSON file + file_type = exported_user["icon"]["url"].rsplit(".", maxsplit=1)[-1] + filename = f"avatar.{file_type}" + exported_user["icon"]["url"] = filename + + # Additional settings + # can't be serialized as AP vals = [ - "username", - "name", - "summary", - "manually_approves_followers", - "hide_follows", "show_goal", - "show_suggested_users", - "discoverable", "preferred_timezone", "default_post_privacy", + "show_suggested_users", ] + exported_user["settings"] = {} for k in vals: - exported_user[k] = getattr(user, k) + exported_user["settings"][k] = getattr(user, k) - if getattr(user, "avatar", False): - exported_user["avatar"] = f'https://{DOMAIN}{getattr(user, "avatar").url}' - - # reading goals + # Reading goals + # can't be serialized as AP reading_goals = AnnualGoal.objects.filter(user=user).distinct() - goals_list = [] - # TODO: either error checking should be more sophisticated - # or maybe we don't need this try/except - try: - for goal in reading_goals: - goals_list.append( - {"goal": goal.goal, "year": goal.year, "privacy": goal.privacy} - ) - except Exception: # pylint: disable=broad-except - pass + exported_user["goals"] = [] + for goal in reading_goals: + exported_user["goals"].append( + {"goal": goal.goal, "year": goal.year, "privacy": goal.privacy} + ) - try: - readthroughs = ReadThrough.objects.filter(user=user).distinct().values() - readthroughs = list(readthroughs) - except Exception: # pylint: disable=broad-except - readthroughs = [] + # Reading history + # can't be serialized as AP + readthroughs = ReadThrough.objects.filter(user=user).distinct().values() + readthroughs = list(readthroughs) - # books + # Books editions = get_books_for_user(user) - final_books = [] - - # editions + exported_user["books"] = [] for edition in editions: book = {} - book[ - "edition" - ] = edition.to_activity() # <== BUG Link field class is unknown here. + book["edition"] = edition.to_activity() # authors book["authors"] = [] @@ -129,27 +129,35 @@ def json_export(user): # pylint: disable=too-many-locals, too-many-statements obj = author.to_activity() book["authors"].append(obj) - # Shelves and shelfbooks + # Shelves this book is on + # All we want is the shelf identifier and name + # Every ShelfItem is this book so there's no point + # serialising to_activity() + # can be serialized as AP but can't use to_model on import book["shelves"] = [] - user_shelves = Shelf.objects.filter(user=user).all() + shelf_books = ShelfBook.objects.filter(book=edition).distinct() + user_shelves = Shelf.objects.filter(id__in=shelf_books) for shelf in user_shelves: - obj = {"shelf_books": []} - obj["shelf_info"] = shelf.to_activity() - shelf_books = ShelfBook.objects.filter(book=edition, shelf=shelf).distinct() - - for shelfbook in shelf_books: - obj["shelf_books"].append(shelfbook.to_activity()) - + obj = { + "identifier": shelf.identifier, + "name": shelf.name, + "description": shelf.description, + "editable": shelf.editable, + "privacy": shelf.privacy, + } book["shelves"].append(obj) - # List and ListItem + # Lists and ListItems + # ListItems include "notes" and "approved" so we need them + # even though we know it's this book book["lists"] = [] user_lists = List.objects.filter(user=user).all() for booklist in user_lists: obj = {"list_items": []} obj["list_info"] = booklist.to_activity() + obj["list_info"]["privacy"] = booklist.privacy list_items = ListItem.objects.filter(book_list=booklist).distinct() for item in list_items: obj["list_items"].append(item.to_activity()) @@ -160,8 +168,8 @@ def json_export(user): # pylint: disable=too-many-locals, too-many-statements # Can't use select_subclasses here because # we need to filter on the "book" value, # which is not available on an ordinary Status - for x in ["comments", "quotations", "reviews"]: - book[x] = [] + for status in ["comments", "quotations", "reviews"]: + book[status] = [] comments = Comment.objects.filter(user=user, book=edition).all() for status in comments: @@ -176,41 +184,31 @@ def json_export(user): # pylint: disable=too-many-locals, too-many-statements book["reviews"].append(status.to_activity()) # readthroughs can't be serialized to activity + # so we use values() book_readthroughs = ( ReadThrough.objects.filter(user=user, book=edition).distinct().values() ) book["readthroughs"] = list(book_readthroughs) # append everything - final_books.append(book) + exported_user["books"].append(book) - logger.info(final_books) - - # saved book lists + # saved book lists - just the remote id saved_lists = List.objects.filter(id__in=user.saved_lists.all()).distinct() - saved_lists = [l.remote_id for l in saved_lists] + exported_user["saved_lists"] = [l.remote_id for l in saved_lists] - # follows + # follows - just the remote id follows = UserFollows.objects.filter(user_subject=user).distinct() following = User.objects.filter(userfollows_user_object__in=follows).distinct() - follows = [f.remote_id for f in following] + exported_user["follows"] = [f.remote_id for f in following] - # blocks + # blocks - just the remote id blocks = UserBlocks.objects.filter(user_subject=user).distinct() blocking = User.objects.filter(userblocks_user_object__in=blocks).distinct() - blocks = [b.remote_id for b in blocking] + exported_user["blocks"] = [b.remote_id for b in blocking] - data = { - "user": exported_user, - "goals": goals_list, - "books": final_books, - "saved_lists": saved_lists, - "follows": follows, - "blocked_users": blocks, - } - - return DjangoJSONEncoder().encode(data) + return DjangoJSONEncoder().encode(exported_user) def get_books_for_user(user): diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index 16dad1bfc..e31346caf 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -45,11 +45,12 @@ def start_import_task(**kwargs): archive_file.open("rb") with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: job.import_data = json.loads(tar.read("archive.json").decode("utf-8")) + # TODO: option to import "user.json" instead if "include_user_profile" in job.required: - update_user_profile(job.user, tar, job.import_data.get("user")) + update_user_profile(job.user, tar, job.import_data) if "include_user_settings" in job.required: - update_user_settings(job.user, job.import_data.get("user")) + update_user_settings(job.user, job.import_data) if "include_goals" in job.required: update_goals(job.user, job.import_data.get("goals")) if "include_saved_lists" in job.required: @@ -57,7 +58,7 @@ def start_import_task(**kwargs): if "include_follows" in job.required: upsert_follows(job.user, job.import_data.get("follows")) if "include_blocks" in job.required: - upsert_user_blocks(job.user, job.import_data.get("blocked_users")) + upsert_user_blocks(job.user, job.import_data.get("blocks")) process_books(job, tar) @@ -72,8 +73,6 @@ def start_import_task(**kwargs): def process_books(job, tar): """process user import data related to books""" - # create the books. We need to merge Book and Edition instances - # and also check whether these books already exist in the DB books = job.import_data.get("books") for data in books: @@ -85,22 +84,22 @@ def process_books(job, tar): if "include_readthroughs" in job.required: upsert_readthroughs(data.get("readthroughs"), job.user, book.id) - if "include_reviews" in job.required: - get_or_create_statuses( - job.user, models.Review, data.get("reviews"), book.id - ) - if "include_comments" in job.required: - get_or_create_statuses( - job.user, models.Comment, data.get("comments"), book.id + upsert_statuses( + job.user, models.Comment, data.get("comments"), book.remote_id + ) + if "include_quotations" in job.required: + upsert_statuses( + job.user, models.Quotation, data.get("quotations"), book.remote_id ) - if "include_quotes" in job.required: - get_or_create_statuses( - job.user, models.Quotation, data.get("quotes"), book.id + if "include_reviews" in job.required: + upsert_statuses( + job.user, models.Review, data.get("reviews"), book.remote_id ) + if "include_lists" in job.required: - upsert_lists(job.user, data.get("lists"), data.get("list_items"), book.id) + upsert_lists(job.user, data.get("lists"), book.id) def get_or_create_edition(book_data, tar): @@ -108,251 +107,115 @@ def get_or_create_edition(book_data, tar): find or create the edition in the database and return an edition instance""" - cover_path = book_data.get( - "cover", None - ) # we use this further down but need to assign a var before cleaning - - clean_book = clean_values(book_data) - book = clean_book.copy() # don't mutate the original book data - - # prefer edition values only if they are not null - edition = clean_values(book["edition"]) - for key in edition.keys(): - if key not in book.keys() or ( - key in book.keys() and (edition[key] not in [None, ""]) - ): - book[key] = edition[key] - - existing = find_existing(models.Edition, book) + book = book_data.get("edition") + cover = book.get("cover") + cover_path = cover.get("url", None) + existing = models.Edition.find_existing(book) if existing: return existing # the book is not in the local database, so we have to do this the hard way - local_authors = get_or_create_authors(book["authors"]) - - # get rid of everything that's not strictly in a Book - # or is many-to-many so can't be set directly - associated_values = [ - "edition", - "authors", - "readthroughs", - "shelves", - "shelf_books", - "lists", - "list_items", - "reviews", - "comments", - "quotes", - ] - - for val in associated_values: - del book[val] - - # now we can save the book as an Edition - new_book = models.Edition.objects.create(**book) - new_book.authors.set(local_authors) # now we can add authors with set() - - # get cover from original book_data because we lost it in clean_values - if cover_path: - tar.write_image_to_file(cover_path, new_book.cover) - - # NOTE: clean_values removes "last_edited_by" - # because it's a user ID from the old database - # if this is required, bookwyrm_export_job will - # need to bring in the user who edited it. - - # create parent - work = models.Work.objects.create(title=book["title"]) - work.authors.set(local_authors) - new_book.parent_work = work - - new_book.save(broadcast=False) - return new_book - - -def clean_values(data): - """clean values we don't want when creating new instances""" - - values = [ - "id", - "pk", - "remote_id", - "cover", - "preview_image", - "last_edited_by", - "last_edited_by_id", - "user", - "book_list", - "shelf_book", - "parent_work_id", - ] - - common = data.keys() & values - new_data = data - for val in common: - del new_data[val] - return new_data - - -def find_existing(cls, data): - """Given a book or author, find any existing model instances""" - - identifiers = [ - "openlibrary_key", - "inventaire_id", - "librarything_key", - "goodreads_key", - "asin", - "isfdb", - "isbn_10", - "isbn_13", - "oclc_number", - "origin_id", - "viaf", - "wikipedia_link", - "isni", - "gutenberg_id", - ] - - match_fields = [] - for i in identifiers: - if data.get(i) not in [None, ""]: - match_fields.append({i: data.get(i)}) - - if len(match_fields) > 0: - match = cls.objects.filter(reduce(operator.or_, (Q(**f) for f in match_fields))) - return match.first() - return None - - -def get_or_create_authors(data): - """Take a JSON string of authors find or create the authors - in the database and return a list of author instances""" + # make sure we have the authors in the local DB authors = [] - for author in data: - clean = clean_values(author) - existing = find_existing(models.Author, clean) + for author in book_data.get("authors"): + existing = models.Author.find_existing(author) if existing: authors.append(existing) else: - new = models.Author.objects.create(**clean) + new = author.to_model(model=models.Author, save=True) authors.append(new) - return authors + + # don't save the authors from the old server + book["authors"] = [] + # use the cover image from the tar + if cover_path: + tar.write_image_to_file(cover_path, new_book.cover) + new_book = book.to_model(model=models.Edition, save=True) + new_book.authors.set(authors) + + return new_book def upsert_readthroughs(data, user, book_id): """Take a JSON string of readthroughs, find or create the instances in the database and return a list of saved instances""" - for read_thru in data: - start_date = ( - parse_datetime(read_thru["start_date"]) - if read_thru["start_date"] is not None - else None - ) - finish_date = ( - parse_datetime(read_thru["finish_date"]) - if read_thru["finish_date"] is not None - else None - ) - stopped_date = ( - parse_datetime(read_thru["stopped_date"]) - if read_thru["stopped_date"] is not None - else None - ) - readthrough = { - "user": user, - "book": models.Edition.objects.get(id=book_id), - "progress": read_thru["progress"], - "progress_mode": read_thru["progress_mode"], - "start_date": start_date, - "finish_date": finish_date, - "stopped_date": stopped_date, - "is_active": read_thru["is_active"], - } + for read_through in data: + del read_through["id"] + del read_through["remote_id"] + read_through["user_id"] = user.id + read_through["book_id"] = book_id - existing = models.ReadThrough.objects.filter(**readthrough).exists() + existing = models.ReadThrough.objects.filter(**read_through).first() if not existing: - models.ReadThrough.objects.create(**readthrough) + models.ReadThrough.objects.create(**read_through) -def get_or_create_statuses(user, cls, data, book_id): +def upsert_statuses(user, cls, data, book_id): """Take a JSON string of a status and find or create the instances in the database""" - for book_status in data: + for status in data: - keys = [ - "content", - "raw_content", - "content_warning", - "privacy", - "sensitive", - "published_date", - "reading_status", - "name", - "rating", - "quote", - "raw_quote", - "progress", - "progress_mode", - "position", - "position_mode", - ] - common = book_status.keys() & keys - status = {k: book_status[k] for k in common} - status["published_date"] = parse_datetime(book_status["published_date"]) - if "rating" in common: - status["rating"] = float(book_status["rating"]) - book = models.Edition.objects.get(id=book_id) - exists = cls.objects.filter(**status, book=book, user=user).exists() - if not exists: - cls.objects.create(**status, book=book, user=user) + # change user and remove replies + status["attributedTo"] = user.remote_id + status["to"] = [] + status["replies"] = {} + status["inReplyToBook"] = book_id + existing = cls.find_existing(status) + if existing: + existing.save(broadcast=False) + else: + status.to_model(model=cls, save=True) -def upsert_lists(user, lists, items, book_id): +def upsert_lists(user, lists, book_id): """Take a list and ListItems as JSON and create DB entries if they don't already exist""" book = models.Edition.objects.get(id=book_id) - for lst in lists: - book_list = models.List.objects.filter(name=lst["name"], user=user).first() - if not book_list: - book_list = models.List.objects.create( + for book_list in lists: + blist = book_list["list_info"] + booklist = models.List.find_existing(blist) + if not booklist: + booklist = models.List.objects.create( + name=blist["list_info"]["name"], user=user, - name=lst["name"], - description=lst["description"], - curation=lst["curation"], - privacy=lst["privacy"], + description=blist["list_info"]["summary"], + curation=blist["list_info"]["curation"], + privacy=blist["list_info"]["privacy"], ) - # If the list exists but the ListItem doesn't don't try to add it - # with the same order as an existing item - count = models.ListItem.objects.filter(book_list=book_list).count() + # If the list exists but the ListItem doesn't + # we need to re-order the item + count = models.ListItem.objects.filter(book_list=booklist).count() - for i in items[lst["name"]]: + for item in book_list["list_items"]: if not models.ListItem.objects.filter( - book=book, book_list=book_list, user=user + book=book, book_list=booklist, user=user ).exists(): models.ListItem.objects.create( book=book, - book_list=book_list, + book_list=booklist, user=user, - notes=i["notes"], - order=i["order"] + count, + approved=item["approved"], + notes=item["notes"], + order=item["order"] + count, ) def upsert_shelves(book, user, book_data): - """Take shelf and ShelfBooks JSON objects and create + """Take shelf JSON objects and create DB entries if they don't already exist""" shelves = book_data["shelves"] for shelf in shelves: - book_shelf = models.Shelf.objects.filter(name=shelf["name"], user=user).first() + book_shelf = models.Shelf.objects.filter( + identifier=shelf["identifier"], user=user + ).first() if not book_shelf: book_shelf = models.Shelf.objects.create( name=shelf["name"], @@ -363,27 +226,24 @@ def upsert_shelves(book, user, book_data): privacy=shelf["privacy"], ) - for shelfbook in book_data["shelf_books"][book_shelf.identifier]: - - shelved_date = parse_datetime(shelfbook["shelved_date"]) - - if not models.ShelfBook.objects.filter( - book=book, shelf=book_shelf, user=user - ).exists(): - models.ShelfBook.objects.create( - book=book, - shelf=book_shelf, - user=user, - shelved_date=shelved_date, - ) + # add the book as a ShelfBook + if not models.ShelfBook.objects.filter( + book=book, shelf=book_shelf, user=user + ).exists(): + models.ShelfBook.objects.create( + book=book, + shelf=book_shelf, + user=user, + shelved_date=shelved_date, + ) def update_user_profile(user, tar, data): """update the user's profile from import data""" - name = data.get("name") - username = data.get("username").split("@")[0] + name = data.get("name", None) + username = data.get("preferredUsername") user.name = name if name else username - user.summary = data.get("summary") + user.summary = data.get("summary", None) user.save(update_fields=["name", "summary"]) if data.get("avatar") is not None: @@ -394,18 +254,29 @@ def update_user_profile(user, tar, data): def update_user_settings(user, data): """update the user's settings from import data""" - update_fields = [ - "manually_approves_followers", - "hide_follows", - "show_goal", - "show_suggested_users", - "discoverable", - "preferred_timezone", - "default_post_privacy", + update_fields = ["manually_approves_followers", "hide_follows", "discoverable"] + + ap_fields = [ + ("manuallyApprovesFollowers", "manually_approves_followers"), + ("hideFollows", "hide_follows"), + ("discoverable", "discoverable"), ] - for field in update_fields: - setattr(user, field, data[field]) + for (ap_field, bw_field) in ap_fields: + setattr(user, bw_field, data[ap_field]) + + bw_fields = [ + "show_goal", + "show_suggested_users", + "default_post_privacy", + "preferred_timezone", + ] + + for field in bw_fields: + if data["settings"].get(field, False): + update_fields.append(field) + setattr(user, field, data["settings"][field]) + user.save(update_fields=update_fields) From 7f654be927d4a5af1e999b631ba4adc75dd4d49b Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sat, 11 Nov 2023 11:41:45 +1100 Subject: [PATCH 4/8] use to_activity and to_model where possible Currently getting errors on import for BookData objects when creating a new instance --- bookwyrm/models/bookwyrm_export_job.py | 44 +++++++++++-------------- bookwyrm/models/bookwyrm_import_job.py | 45 +++++++++++++++----------- 2 files changed, 46 insertions(+), 43 deletions(-) diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index 5802b7c3e..974ec5665 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -76,13 +76,11 @@ def json_export( ): # pylint: disable=too-many-locals, too-many-statements, too-many-branches """Generate an export for a user""" - exported_user = {} - # User as AP object exported_user = user.to_activity() # I don't love this but it prevents a JSON encoding error # when there is no user image - if isinstance(exported_user["icon"], dataclasses._MISSING_TYPE): + if isinstance(exported_user["icon"], dataclasses._MISSING_TYPE): # pylint: disable=protected-access exported_user["icon"] = {} else: # change the URL to be relative to the JSON file @@ -126,25 +124,22 @@ def json_export( # authors book["authors"] = [] for author in edition.authors.all(): - obj = author.to_activity() + obj = author.to_activity() # <== this doesn't include blank optional fields + book["authors"].append(obj) # Shelves this book is on - # All we want is the shelf identifier and name - # Every ShelfItem is this book so there's no point - # serialising to_activity() - # can be serialized as AP but can't use to_model on import + # Every ShelfItem is this book so there's no point serialising + # Shelves can be serialized as AP but can't use to_model on import book["shelves"] = [] - shelf_books = ShelfBook.objects.filter(book=edition).distinct() - user_shelves = Shelf.objects.filter(id__in=shelf_books) - - for shelf in user_shelves: + shelf_books = ShelfBook.objects.filter(user=user, book=edition).distinct() + for shelfbook in shelf_books: obj = { - "identifier": shelf.identifier, - "name": shelf.name, - "description": shelf.description, - "editable": shelf.editable, - "privacy": shelf.privacy, + "identifier": shelfbook.shelf.identifier, + "name": shelfbook.shelf.name, + "description": shelfbook.shelf.description, + "editable": shelfbook.shelf.editable, + "privacy": shelfbook.shelf.privacy, } book["shelves"].append(obj) @@ -152,15 +147,15 @@ def json_export( # ListItems include "notes" and "approved" so we need them # even though we know it's this book book["lists"] = [] - user_lists = List.objects.filter(user=user).all() + list_items = ListItem.objects.filter(book=edition, user=user).distinct() - for booklist in user_lists: + for item in list_items: obj = {"list_items": []} - obj["list_info"] = booklist.to_activity() - obj["list_info"]["privacy"] = booklist.privacy - list_items = ListItem.objects.filter(book_list=booklist).distinct() - for item in list_items: - obj["list_items"].append(item.to_activity()) + obj["list_items"].append(item.to_activity()) + + list_info = item.book_list.to_activity() + list_info["privacy"] = item.book_list.privacy + obj["list_info"] = list_info book["lists"].append(obj) @@ -184,7 +179,6 @@ def json_export( book["reviews"].append(status.to_activity()) # readthroughs can't be serialized to activity - # so we use values() book_readthroughs = ( ReadThrough.objects.filter(user=user, book=edition).distinct().values() ) diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index e31346caf..ef994ae7b 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -1,18 +1,17 @@ """Import a user from another Bookwyrm instance""" -from functools import reduce import json import logging -import operator from django.db.models import FileField, JSONField, CharField -from django.db.models import Q -from django.utils.dateparse import parse_datetime +from django.utils import timezone +from django.utils.html import strip_tags from django.contrib.postgres.fields import ArrayField as DjangoArrayField from bookwyrm import activitypub from bookwyrm import models from bookwyrm.tasks import app, IMPORTS +from bookwyrm.models.fields import HtmlField from bookwyrm.models.job import ParentJob, ParentTask, SubTask from bookwyrm.utils.tar import BookwyrmTarFile @@ -115,7 +114,6 @@ def get_or_create_edition(book_data, tar): return existing # the book is not in the local database, so we have to do this the hard way - # make sure we have the authors in the local DB authors = [] for author in book_data.get("authors"): @@ -123,16 +121,19 @@ def get_or_create_edition(book_data, tar): if existing: authors.append(existing) else: - new = author.to_model(model=models.Author, save=True) - authors.append(new) + ap_author = activitypub.base_activity.ActivityObject(**author) + instance = ap_author.to_model(model=models.Author, save=True) + authors.append(instance) # don't save the authors from the old server book["authors"] = [] + ap_book = activitypub.base_activity.ActivityObject(**book) + new_book = ap_book.to_model(model=models.Edition, save=True) + # now set the local authors + new_book.authors.set(authors) # use the cover image from the tar if cover_path: tar.write_image_to_file(cover_path, new_book.cover) - new_book = book.to_model(model=models.Edition, save=True) - new_book.authors.set(authors) return new_book @@ -142,8 +143,11 @@ def upsert_readthroughs(data, user, book_id): instances in the database and return a list of saved instances""" for read_through in data: + # don't match to fields that will never match del read_through["id"] del read_through["remote_id"] + del read_through["updated_date"] + # update ids read_through["user_id"] = user.id read_through["book_id"] = book_id @@ -178,14 +182,18 @@ def upsert_lists(user, lists, book_id): for book_list in lists: blist = book_list["list_info"] - booklist = models.List.find_existing(blist) + booklist = models.List.objects.filter( + user=user, + name=blist["name"] + ).first() + if not booklist: booklist = models.List.objects.create( - name=blist["list_info"]["name"], + name=blist["name"], user=user, - description=blist["list_info"]["summary"], - curation=blist["list_info"]["curation"], - privacy=blist["list_info"]["privacy"], + description=blist["summary"], + curation=blist["curation"], + privacy=blist["privacy"], ) # If the list exists but the ListItem doesn't @@ -234,7 +242,7 @@ def upsert_shelves(book, user, book_data): book=book, shelf=book_shelf, user=user, - shelved_date=shelved_date, + shelved_date=timezone.now() ) @@ -243,10 +251,11 @@ def update_user_profile(user, tar, data): name = data.get("name", None) username = data.get("preferredUsername") user.name = name if name else username - user.summary = data.get("summary", None) + user.summary = strip_tags(data.get("summary", None)) + logger.info(f"USER SUMMARY ==> {user.summary}") user.save(update_fields=["name", "summary"]) - if data.get("avatar") is not None: + if data.get("icon") is not None: avatar_filename = next(filter(lambda n: n.startswith("avatar"), tar.getnames())) tar.write_image_to_file(avatar_filename, user.avatar) @@ -292,7 +301,7 @@ def update_goals(user, data): """update the user's goals from import data""" for goal in data: - # edit the existing goal if there is one instead of making a new one + # edit the existing goal if there is one existing = models.AnnualGoal.objects.filter( year=goal["year"], user=user ).first() From 8d52fa92b2c024c799963af3a0791284830f05f8 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 12 Nov 2023 16:16:34 +1100 Subject: [PATCH 5/8] fixes for import and export - use AP JSON where possible - minor template wording updates --- bookwyrm/models/bookwyrm_export_job.py | 78 +++---- bookwyrm/models/bookwyrm_import_job.py | 198 +++++++++--------- bookwyrm/templates/import/import_user.html | 2 +- .../templates/preferences/export-user.html | 1 + 4 files changed, 142 insertions(+), 137 deletions(-) diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index 974ec5665..0294a060f 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -9,7 +9,7 @@ from django.db.models import Q from django.core.serializers.json import DjangoJSONEncoder from django.core.files.base import ContentFile -from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, Shelf, List, ListItem +from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, List, ListItem from bookwyrm.models import Review, Comment, Quotation from bookwyrm.models import Edition from bookwyrm.models import UserFollows, User, UserBlocks @@ -80,7 +80,10 @@ def json_export( exported_user = user.to_activity() # I don't love this but it prevents a JSON encoding error # when there is no user image - if isinstance(exported_user["icon"], dataclasses._MISSING_TYPE): # pylint: disable=protected-access + if isinstance( + exported_user["icon"], + dataclasses._MISSING_TYPE, # pylint: disable=protected-access + ): exported_user["icon"] = {} else: # change the URL to be relative to the JSON file @@ -88,8 +91,7 @@ def json_export( filename = f"avatar.{file_type}" exported_user["icon"]["url"] = filename - # Additional settings - # can't be serialized as AP + # Additional settings - can't be serialized as AP vals = [ "show_goal", "preferred_timezone", @@ -100,8 +102,7 @@ def json_export( for k in vals: exported_user["settings"][k] = getattr(user, k) - # Reading goals - # can't be serialized as AP + # Reading goals - can't be serialized as AP reading_goals = AnnualGoal.objects.filter(user=user).distinct() exported_user["goals"] = [] for goal in reading_goals: @@ -109,39 +110,40 @@ def json_export( {"goal": goal.goal, "year": goal.year, "privacy": goal.privacy} ) - # Reading history - # can't be serialized as AP + # Reading history - can't be serialized as AP readthroughs = ReadThrough.objects.filter(user=user).distinct().values() readthroughs = list(readthroughs) # Books editions = get_books_for_user(user) exported_user["books"] = [] + for edition in editions: book = {} + book["work"] = edition.parent_work.to_activity() book["edition"] = edition.to_activity() + if book["edition"].get("cover"): + # change the URL to be relative to the JSON file + filename = book["edition"]["cover"]["url"].rsplit("/", maxsplit=1)[-1] + book["edition"]["cover"]["url"] = f"covers/{filename}" + # authors book["authors"] = [] for author in edition.authors.all(): - obj = author.to_activity() # <== this doesn't include blank optional fields - - book["authors"].append(obj) + book["authors"].append(author.to_activity()) # Shelves this book is on - # Every ShelfItem is this book so there's no point serialising - # Shelves can be serialized as AP but can't use to_model on import + # Every ShelfItem is this book so we don't other serializing book["shelves"] = [] - shelf_books = ShelfBook.objects.filter(user=user, book=edition).distinct() + shelf_books = ( + ShelfBook.objects.select_related("shelf") + .filter(user=user, book=edition) + .distinct() + ) + for shelfbook in shelf_books: - obj = { - "identifier": shelfbook.shelf.identifier, - "name": shelfbook.shelf.name, - "description": shelfbook.shelf.description, - "editable": shelfbook.shelf.editable, - "privacy": shelfbook.shelf.privacy, - } - book["shelves"].append(obj) + book["shelves"].append(shelfbook.shelf.to_activity()) # Lists and ListItems # ListItems include "notes" and "approved" so we need them @@ -150,14 +152,12 @@ def json_export( list_items = ListItem.objects.filter(book=edition, user=user).distinct() for item in list_items: - obj = {"list_items": []} - obj["list_items"].append(item.to_activity()) - list_info = item.book_list.to_activity() - list_info["privacy"] = item.book_list.privacy - obj["list_info"] = list_info - - book["lists"].append(obj) + list_info[ + "privacy" + ] = item.book_list.privacy # this isn't serialized so we add it + list_info["list_item"] = item.to_activity() + book["lists"].append(list_info) # Statuses # Can't use select_subclasses here because @@ -208,13 +208,17 @@ def json_export( def get_books_for_user(user): """Get all the books and editions related to a user""" - editions = Edition.objects.filter( - Q(shelves__user=user) - | Q(readthrough__user=user) - | Q(review__user=user) - | Q(list__user=user) - | Q(comment__user=user) - | Q(quotation__user=user) - ).distinct() + editions = ( + Edition.objects.select_related("parent_work") + .filter( + Q(shelves__user=user) + | Q(readthrough__user=user) + | Q(review__user=user) + | Q(list__user=user) + | Q(comment__user=user) + | Q(quotation__user=user) + ) + .distinct() + ) return editions diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index ef994ae7b..628c48613 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -11,7 +11,6 @@ from django.contrib.postgres.fields import ArrayField as DjangoArrayField from bookwyrm import activitypub from bookwyrm import models from bookwyrm.tasks import app, IMPORTS -from bookwyrm.models.fields import HtmlField from bookwyrm.models.job import ParentJob, ParentTask, SubTask from bookwyrm.utils.tar import BookwyrmTarFile @@ -44,7 +43,7 @@ def start_import_task(**kwargs): archive_file.open("rb") with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: job.import_data = json.loads(tar.read("archive.json").decode("utf-8")) - # TODO: option to import "user.json" instead + # TODO: option to import "user.json" in unzipped tar (i.e. Mastodon) instead if "include_user_profile" in job.required: update_user_profile(job.user, tar, job.import_data) @@ -70,7 +69,11 @@ def start_import_task(**kwargs): def process_books(job, tar): - """process user import data related to books""" + """ + Process user import data related to books + We always import the books even if not assigning + them to shelves, lists etc + """ books = job.import_data.get("books") @@ -102,147 +105,146 @@ def process_books(job, tar): def get_or_create_edition(book_data, tar): - """Take a JSON string of book and edition data, - find or create the edition in the database and + """Take a JSON string of work and edition data, + find or create the edition and work in the database and return an edition instance""" - book = book_data.get("edition") - cover = book.get("cover") - cover_path = cover.get("url", None) - existing = models.Edition.find_existing(book) + edition = book_data.get("edition") + existing = models.Edition.find_existing(edition) if existing: return existing - # the book is not in the local database, so we have to do this the hard way # make sure we have the authors in the local DB - authors = [] + # replace the old author ids in the edition JSON + edition["authors"] = [] for author in book_data.get("authors"): - existing = models.Author.find_existing(author) - if existing: - authors.append(existing) - else: - ap_author = activitypub.base_activity.ActivityObject(**author) - instance = ap_author.to_model(model=models.Author, save=True) - authors.append(instance) + parsed_author = activitypub.parse(author) + instance = parsed_author.to_model( + model=models.Author, save=True, overwrite=False + ) - # don't save the authors from the old server - book["authors"] = [] - ap_book = activitypub.base_activity.ActivityObject(**book) - new_book = ap_book.to_model(model=models.Edition, save=True) - # now set the local authors - new_book.authors.set(authors) - # use the cover image from the tar + edition["authors"].append(instance.remote_id) + + # we will add the cover later from the tar + # don't try to load it from the old server + cover = edition.get("cover", {}) + cover_path = cover.get("url", None) + edition["cover"] = {} + + # first we need the parent work to exist + work = book_data.get("work") + work["editions"] = [] + parsed_work = activitypub.parse(work) + work_instance = parsed_work.to_model(model=models.Work, save=True, overwrite=False) + + # now we have a work we can add it to the edition + # and create the edition model instance + edition["work"] = work_instance.remote_id + parsed_edition = activitypub.parse(edition) + book = parsed_edition.to_model(model=models.Edition, save=True, overwrite=False) + + # set the cover image from the tar if cover_path: - tar.write_image_to_file(cover_path, new_book.cover) + tar.write_image_to_file(cover_path, book.cover) - return new_book + return book def upsert_readthroughs(data, user, book_id): - """Take a JSON string of readthroughs, find or create the - instances in the database and return a list of saved instances""" + """Take a JSON string of readthroughs and + find or create the instances in the database""" for read_through in data: - # don't match to fields that will never match - del read_through["id"] - del read_through["remote_id"] - del read_through["updated_date"] - # update ids - read_through["user_id"] = user.id - read_through["book_id"] = book_id - existing = models.ReadThrough.objects.filter(**read_through).first() + obj = {} + keys = [ + "progress_mode", + "start_date", + "finish_date", + "stopped_date", + "is_active", + ] + for key in keys: + obj[key] = read_through[key] + obj["user_id"] = user.id + obj["book_id"] = book_id + + existing = models.ReadThrough.objects.filter(**obj).first() if not existing: - models.ReadThrough.objects.create(**read_through) + models.ReadThrough.objects.create(**obj) -def upsert_statuses(user, cls, data, book_id): +def upsert_statuses(user, cls, data, book_remote_id): """Take a JSON string of a status and find or create the instances in the database""" for status in data: - # change user and remove replies + # change ids and remove replies status["attributedTo"] = user.remote_id status["to"] = [] status["replies"] = {} - status["inReplyToBook"] = book_id - existing = cls.find_existing(status) - if existing: - existing.save(broadcast=False) - else: - status.to_model(model=cls, save=True) + status["inReplyToBook"] = book_remote_id + + # save new status or do nothing if it already exists + parsed = activitypub.parse(status) + parsed.to_model(model=cls, save=True, overwrite=False) def upsert_lists(user, lists, book_id): - """Take a list and ListItems as JSON and - create DB entries if they don't already exist""" + """Take a list of objects each containing + a list and list item as AP objects + + Because we are creating new IDs we can't assume the id + will exist or be accurate, so we only use to_model for + adding new items after checking whether they exist . + + """ book = models.Edition.objects.get(id=book_id) - for book_list in lists: - blist = book_list["list_info"] - booklist = models.List.objects.filter( - user=user, - name=blist["name"] - ).first() - + for blist in lists: + booklist = models.List.objects.filter(name=blist["name"], user=user).first() if not booklist: - booklist = models.List.objects.create( - name=blist["name"], + + blist["owner"] = user.remote_id + parsed = activitypub.parse(blist) + booklist = parsed.to_model(model=models.List, save=True, overwrite=False) + + booklist.privacy = blist["privacy"] + booklist.save() + + item = models.ListItem.objects.filter(book=book, book_list=booklist).exists() + if not item: + count = booklist.books.count() + models.ListItem.objects.create( + book=book, + book_list=booklist, user=user, - description=blist["summary"], - curation=blist["curation"], - privacy=blist["privacy"], + notes=blist["list_item"]["notes"], + approved=blist["list_item"]["approved"], + order=count + 1, ) - # If the list exists but the ListItem doesn't - # we need to re-order the item - count = models.ListItem.objects.filter(book_list=booklist).count() - - for item in book_list["list_items"]: - if not models.ListItem.objects.filter( - book=book, book_list=booklist, user=user - ).exists(): - models.ListItem.objects.create( - book=book, - book_list=booklist, - user=user, - approved=item["approved"], - notes=item["notes"], - order=item["order"] + count, - ) - def upsert_shelves(book, user, book_data): """Take shelf JSON objects and create DB entries if they don't already exist""" shelves = book_data["shelves"] - for shelf in shelves: - book_shelf = models.Shelf.objects.filter( - identifier=shelf["identifier"], user=user - ).first() - if not book_shelf: - book_shelf = models.Shelf.objects.create( - name=shelf["name"], - user=user, - identifier=shelf["identifier"], - description=shelf["description"], - editable=shelf["editable"], - privacy=shelf["privacy"], - ) - # add the book as a ShelfBook + book_shelf = models.Shelf.objects.filter(name=shelf["name"], user=user).first() + + if not book_shelf: + book_shelf = models.Shelf.objects.create(name=shelf["name"], user=user) + + # add the book as a ShelfBook if needed if not models.ShelfBook.objects.filter( book=book, shelf=book_shelf, user=user ).exists(): models.ShelfBook.objects.create( - book=book, - shelf=book_shelf, - user=user, - shelved_date=timezone.now() + book=book, shelf=book_shelf, user=user, shelved_date=timezone.now() ) @@ -251,11 +253,9 @@ def update_user_profile(user, tar, data): name = data.get("name", None) username = data.get("preferredUsername") user.name = name if name else username - user.summary = strip_tags(data.get("summary", None)) - logger.info(f"USER SUMMARY ==> {user.summary}") + user.summary = strip_tags(data.get("summary", None)) user.save(update_fields=["name", "summary"]) - - if data.get("icon") is not None: + if data["icon"].get("url"): avatar_filename = next(filter(lambda n: n.startswith("avatar"), tar.getnames())) tar.write_image_to_file(avatar_filename, user.avatar) diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html index 29081df00..681ed6756 100644 --- a/bookwyrm/templates/import/import_user.html +++ b/bookwyrm/templates/import/import_user.html @@ -132,7 +132,7 @@ {% trans "Book reviews" %}