Refactor creation of user export archive

This commit is contained in:
Bart Schuurmans 2024-03-25 18:14:00 +01:00
parent e0decbfd1d
commit a51402241b

View file

@ -1,7 +1,6 @@
"""Export user account to tar.gz file for import into another Bookwyrm instance""" """Export user account to tar.gz file for import into another Bookwyrm instance"""
import logging import logging
from uuid import uuid4
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
from boto3.session import Session as BotoSession from boto3.session import Session as BotoSession
@ -225,64 +224,68 @@ class AddFileToTar(ChildJob):
# Using a series of jobs in a loop would be better # Using a series of jobs in a loop would be better
try: try:
export_data = self.parent_export_job.export_data export_job = self.parent_export_job
export_json = self.parent_export_job.export_json export_task_id = str(export_job.task_id)
json_data = DjangoJSONEncoder().encode(export_json)
user = self.parent_export_job.user export_json_bytes = (
DjangoJSONEncoder().encode(export_job.export_json).encode("utf-8")
)
user = export_job.user
editions = get_books_for_user(user) editions = get_books_for_user(user)
# filenames for later
export_data_original = str(export_data)
filename = str(self.parent_export_job.task_id)
if settings.USE_S3: if settings.USE_S3:
s3_job = S3Tar( # Connection for writing temporary files
s3 = S3Boto3Storage()
# Handle for creating the final archive
s3_archive_path = f"exports/{export_task_id}.tar.gz"
s3_tar = S3Tar(
settings.AWS_STORAGE_BUCKET_NAME, settings.AWS_STORAGE_BUCKET_NAME,
f"exports/{filename}.tar.gz", s3_archive_path,
session=BookwyrmAwsSession(), session=BookwyrmAwsSession(),
) )
# save json file # Save JSON file to a temporary location
export_data.save( export_json_tmp_file = f"exports/{export_task_id}/archive.json"
f"archive_{filename}.json", ContentFile(json_data.encode("utf-8")) S3Boto3Storage.save(
s3,
export_json_tmp_file,
ContentFile(export_json_bytes),
) )
s3_job.add_file(f"exports/{export_data.name}") s3_tar.add_file(export_json_tmp_file)
# save image file # Add avatar image if present
file_type = user.avatar.name.rsplit(".", maxsplit=1)[-1] if user.avatar:
export_data.save(f"avatar_{filename}.{file_type}", user.avatar) s3_tar.add_file(f"images/{user.avatar.name}")
s3_job.add_file(f"exports/{export_data.name}")
for book in editions: for edition in editions:
if getattr(book, "cover", False): if edition.cover:
cover_name = f"images/{book.cover.name}" s3_tar.add_file(f"images/{edition.cover.name}")
s3_job.add_file(cover_name, folder="covers")
s3_job.tar() # Create archive and store file name
s3_tar.tar()
export_job.export_data_s3 = s3_archive_path
export_job.save()
# delete child files - we don't need them any more # Delete temporary files
s3_storage = S3Boto3Storage(querystring_auth=True, custom_domain=None) S3Boto3Storage.delete(s3, export_json_tmp_file)
S3Boto3Storage.delete(s3_storage, f"exports/{export_data_original}")
S3Boto3Storage.delete(s3_storage, f"exports/archive_{filename}.json")
S3Boto3Storage.delete(
s3_storage, f"exports/avatar_{filename}.{file_type}"
)
else: else:
export_data.open("wb") export_job.export_data_file = f"{export_task_id}.tar.gz"
with BookwyrmTarFile.open(mode="w:gz", fileobj=export_data) as tar: with export_job.export_data_file.open("wb") as f:
with BookwyrmTarFile.open(mode="w:gz", fileobj=f) as tar:
# save json file
tar.write_bytes(export_json_bytes)
tar.write_bytes(json_data.encode("utf-8")) # Add avatar image if present
if user.avatar:
tar.add_image(user.avatar, directory="images/")
# Add avatar image if present for edition in editions:
if getattr(user, "avatar", False): if edition.cover:
tar.add_image(user.avatar, filename="avatar") tar.add_image(edition.cover, directory="images/")
export_job.save()
for book in editions:
if getattr(book, "cover", False):
tar.add_image(book.cover)
export_data.close()
self.complete_job() self.complete_job()
@ -304,9 +307,8 @@ def start_export_task(**kwargs):
try: try:
# prepare the initial file and base json # prepare the initial file and base json
job.export_data = ContentFile(b"", str(uuid4()))
job.export_json = job.user.to_activity() job.export_json = job.user.to_activity()
job.save(update_fields=["export_data_file", "export_data_s3", "export_json"]) job.save(update_fields=["export_json"])
# let's go # let's go
json_export.delay(job_id=job.id, job_user=job.user.id, no_children=False) json_export.delay(job_id=job.id, job_user=job.user.id, no_children=False)
@ -374,10 +376,9 @@ def json_export(**kwargs):
if not job.export_json.get("icon"): if not job.export_json.get("icon"):
job.export_json["icon"] = {} job.export_json["icon"] = {}
else: else:
# change the URL to be relative to the JSON file job.export_json["icon"]["url"] = url2relativepath(
file_type = job.export_json["icon"]["url"].rsplit(".", maxsplit=1)[-1] job.export_json["icon"]["url"]
filename = f"avatar.{file_type}" )
job.export_json["icon"]["url"] = filename
# Additional settings - can't be serialized as AP # Additional settings - can't be serialized as AP
vals = [ vals = [