mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-06-13 10:49:23 +00:00
9685ae5a0a
Creating the export JSON and export TAR are now the only two tasks.
320 lines
10 KiB
Python
320 lines
10 KiB
Python
"""Export user account to tar.gz file for import into another Bookwyrm instance"""
|
|
|
|
import logging
|
|
from urllib.parse import urlparse, unquote
|
|
|
|
from boto3.session import Session as BotoSession
|
|
from s3_tar import S3Tar
|
|
from storages.backends.s3boto3 import S3Boto3Storage
|
|
|
|
from django.db.models import BooleanField, FileField, JSONField
|
|
from django.db.models import Q
|
|
from django.core.serializers.json import DjangoJSONEncoder
|
|
from django.core.files.base import ContentFile
|
|
from django.utils.module_loading import import_string
|
|
|
|
from bookwyrm import settings
|
|
|
|
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem
|
|
from bookwyrm.models import Review, Comment, Quotation
|
|
from bookwyrm.models import Edition
|
|
from bookwyrm.models import UserFollows, User, UserBlocks
|
|
from bookwyrm.models.job import ParentJob
|
|
from bookwyrm.tasks import app, IMPORTS
|
|
from bookwyrm.utils.tar import BookwyrmTarFile
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class BookwyrmAwsSession(BotoSession):
|
|
"""a boto session that always uses settings.AWS_S3_ENDPOINT_URL"""
|
|
|
|
def client(self, *args, **kwargs): # pylint: disable=arguments-differ
|
|
kwargs["endpoint_url"] = settings.AWS_S3_ENDPOINT_URL
|
|
return super().client("s3", *args, **kwargs)
|
|
|
|
|
|
def select_exports_storage():
|
|
"""callable to allow for dependency on runtime configuration"""
|
|
cls = import_string(settings.EXPORTS_STORAGE)
|
|
return cls()
|
|
|
|
|
|
class BookwyrmExportJob(ParentJob):
|
|
"""entry for a specific request to export a bookwyrm user"""
|
|
|
|
export_data = FileField(null=True, storage=select_exports_storage)
|
|
export_json = JSONField(null=True, encoder=DjangoJSONEncoder)
|
|
json_completed = BooleanField(default=False)
|
|
|
|
def start_job(self):
|
|
"""schedule the first task"""
|
|
|
|
task = create_export_json_task.delay(job_id=self.id)
|
|
self.task_id = task.id
|
|
self.save(update_fields=["task_id"])
|
|
|
|
|
|
def url2relativepath(url: str) -> str:
|
|
"""turn an absolute URL into a relative filesystem path"""
|
|
parsed = urlparse(url)
|
|
return unquote(parsed.path[1:])
|
|
|
|
|
|
@app.task(queue=IMPORTS)
|
|
def create_export_json_task(job_id):
|
|
"""create the JSON data for the export"""
|
|
|
|
job = BookwyrmExportJob.objects.get(id=job_id)
|
|
|
|
# don't start the job if it was stopped from the UI
|
|
if job.complete:
|
|
return
|
|
|
|
try:
|
|
job.set_status("active")
|
|
|
|
# generate JSON structure
|
|
job.export_json = export_json(job.user)
|
|
job.save(update_fields=["export_json"])
|
|
|
|
# create archive in separate task
|
|
create_archive_task.delay(job_id=job.id)
|
|
except Exception as err: # pylint: disable=broad-except
|
|
logger.exception(
|
|
"create_export_json_task for %s failed with error: %s", job, err
|
|
)
|
|
job.set_status("failed")
|
|
|
|
|
|
@app.task(queue=IMPORTS)
|
|
def create_archive_task(job_id):
|
|
"""create the archive containing the JSON file and additional files"""
|
|
|
|
job = BookwyrmExportJob.objects.get(id=job_id)
|
|
|
|
# don't start the job if it was stopped from the UI
|
|
if job.complete:
|
|
return
|
|
|
|
try:
|
|
export_task_id = job.task_id
|
|
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")
|
|
|
|
user = job.user
|
|
editions = get_books_for_user(user)
|
|
|
|
if settings.USE_S3:
|
|
# Connection for writing temporary files
|
|
storage = S3Boto3Storage()
|
|
|
|
# Handle for creating the final archive
|
|
s3_archive_path = f"exports/{export_task_id}.tar.gz"
|
|
s3_tar = S3Tar(
|
|
settings.AWS_STORAGE_BUCKET_NAME,
|
|
s3_archive_path,
|
|
session=BookwyrmAwsSession(),
|
|
)
|
|
|
|
# Save JSON file to a temporary location
|
|
export_json_tmp_file = f"exports/{export_task_id}/archive.json"
|
|
S3Boto3Storage.save(
|
|
storage,
|
|
export_json_tmp_file,
|
|
ContentFile(export_json_bytes),
|
|
)
|
|
s3_tar.add_file(export_json_tmp_file)
|
|
|
|
# Add avatar image if present
|
|
if user.avatar:
|
|
s3_tar.add_file(f"images/{user.avatar.name}")
|
|
|
|
for edition in editions:
|
|
if edition.cover:
|
|
s3_tar.add_file(f"images/{edition.cover.name}")
|
|
|
|
# Create archive and store file name
|
|
s3_tar.tar()
|
|
job.export_data = s3_archive_path
|
|
job.save(update_fields=["export_data"])
|
|
|
|
# Delete temporary files
|
|
S3Boto3Storage.delete(storage, export_json_tmp_file)
|
|
|
|
else:
|
|
job.export_data = f"{export_task_id}.tar.gz"
|
|
with job.export_data.open("wb") as tar_file:
|
|
with BookwyrmTarFile.open(mode="w:gz", fileobj=tar_file) as tar:
|
|
# save json file
|
|
tar.write_bytes(export_json_bytes)
|
|
|
|
# Add avatar image if present
|
|
if user.avatar:
|
|
tar.add_image(user.avatar, directory="images/")
|
|
|
|
for edition in editions:
|
|
if edition.cover:
|
|
tar.add_image(edition.cover, directory="images/")
|
|
job.save(update_fields=["export_data"])
|
|
|
|
job.set_status("completed")
|
|
|
|
except Exception as err: # pylint: disable=broad-except
|
|
logger.exception("create_archive_task for %s failed with error: %s", job, err)
|
|
job.set_status("failed")
|
|
|
|
|
|
def export_json(user: User):
|
|
"""create export JSON"""
|
|
data = export_user(user) # in the root of the JSON structure
|
|
data["settings"] = export_settings(user)
|
|
data["goals"] = export_goals(user)
|
|
data["books"] = export_books(user)
|
|
data["saved_lists"] = export_saved_lists(user)
|
|
data["follows"] = export_follows(user)
|
|
data["blocks"] = export_blocks(user)
|
|
return data
|
|
|
|
|
|
def export_user(user: User):
|
|
"""export user data"""
|
|
data = user.to_activity()
|
|
data["icon"]["url"] = (
|
|
url2relativepath(data["icon"]["url"]) if data.get("icon", False) else {}
|
|
)
|
|
return data
|
|
|
|
|
|
def export_settings(user: User):
|
|
"""Additional settings - can't be serialized as AP"""
|
|
vals = [
|
|
"show_goal",
|
|
"preferred_timezone",
|
|
"default_post_privacy",
|
|
"show_suggested_users",
|
|
]
|
|
return {k: getattr(user, k) for k in vals}
|
|
|
|
|
|
def export_saved_lists(user: User):
|
|
"""add user saved lists to export JSON"""
|
|
return [l.remote_id for l in user.saved_lists.all()]
|
|
|
|
|
|
def export_follows(user: User):
|
|
"""add user follows to export JSON"""
|
|
follows = UserFollows.objects.filter(user_subject=user).distinct()
|
|
following = User.objects.filter(userfollows_user_object__in=follows).distinct()
|
|
return [f.remote_id for f in following]
|
|
|
|
|
|
def export_blocks(user: User):
|
|
"""add user blocks to export JSON"""
|
|
blocks = UserBlocks.objects.filter(user_subject=user).distinct()
|
|
blocking = User.objects.filter(userblocks_user_object__in=blocks).distinct()
|
|
return [b.remote_id for b in blocking]
|
|
|
|
|
|
def export_goals(user: User):
|
|
"""add user reading goals to export JSON"""
|
|
reading_goals = AnnualGoal.objects.filter(user=user).distinct()
|
|
return [
|
|
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy}
|
|
for goal in reading_goals
|
|
]
|
|
|
|
|
|
def export_books(user: User):
|
|
"""add books to export JSON"""
|
|
editions = get_books_for_user(user)
|
|
return [export_book(user, edition) for edition in editions]
|
|
|
|
|
|
def export_book(user: User, edition: Edition):
|
|
"""add book to export JSON"""
|
|
data = {}
|
|
data["work"] = edition.parent_work.to_activity()
|
|
data["edition"] = edition.to_activity()
|
|
|
|
if data["edition"].get("cover"):
|
|
data["edition"]["cover"]["url"] = url2relativepath(
|
|
data["edition"]["cover"]["url"]
|
|
)
|
|
|
|
# authors
|
|
data["authors"] = [author.to_activity() for author in edition.authors.all()]
|
|
|
|
# Shelves this book is on
|
|
# Every ShelfItem is this book so we don't other serializing
|
|
shelf_books = (
|
|
ShelfBook.objects.select_related("shelf")
|
|
.filter(user=user, book=edition)
|
|
.distinct()
|
|
)
|
|
data["shelves"] = [shelfbook.shelf.to_activity() for shelfbook in shelf_books]
|
|
|
|
# Lists and ListItems
|
|
# ListItems include "notes" and "approved" so we need them
|
|
# even though we know it's this book
|
|
list_items = ListItem.objects.filter(book=edition, user=user).distinct()
|
|
|
|
data["lists"] = []
|
|
for item in list_items:
|
|
list_info = item.book_list.to_activity()
|
|
list_info[
|
|
"privacy"
|
|
] = item.book_list.privacy # this isn't serialized so we add it
|
|
list_info["list_item"] = item.to_activity()
|
|
data["lists"].append(list_info)
|
|
|
|
# Statuses
|
|
# Can't use select_subclasses here because
|
|
# we need to filter on the "book" value,
|
|
# which is not available on an ordinary Status
|
|
for status in ["comments", "quotations", "reviews"]:
|
|
data[status] = []
|
|
|
|
comments = Comment.objects.filter(user=user, book=edition).all()
|
|
for status in comments:
|
|
obj = status.to_activity()
|
|
obj["progress"] = status.progress
|
|
obj["progress_mode"] = status.progress_mode
|
|
data["comments"].append(obj)
|
|
|
|
quotes = Quotation.objects.filter(user=user, book=edition).all()
|
|
for status in quotes:
|
|
obj = status.to_activity()
|
|
obj["position"] = status.position
|
|
obj["endposition"] = status.endposition
|
|
obj["position_mode"] = status.position_mode
|
|
data["quotations"].append(obj)
|
|
|
|
reviews = Review.objects.filter(user=user, book=edition).all()
|
|
data["reviews"] = [status.to_activity() for status in reviews]
|
|
|
|
# readthroughs can't be serialized to activity
|
|
book_readthroughs = (
|
|
ReadThrough.objects.filter(user=user, book=edition).distinct().values()
|
|
)
|
|
data["readthroughs"] = list(book_readthroughs)
|
|
return data
|
|
|
|
|
|
def get_books_for_user(user):
|
|
"""Get all the books and editions related to a user"""
|
|
|
|
editions = (
|
|
Edition.objects.select_related("parent_work")
|
|
.filter(
|
|
Q(shelves__user=user)
|
|
| Q(readthrough__user=user)
|
|
| Q(review__user=user)
|
|
| Q(list__user=user)
|
|
| Q(comment__user=user)
|
|
| Q(quotation__user=user)
|
|
)
|
|
.distinct()
|
|
)
|
|
|
|
return editions
|