User export testing fixes

This commit is contained in:
Bart Schuurmans 2024-03-28 13:09:21 +01:00
parent cdbc1d172c
commit dabf7c6e10
2 changed files with 53 additions and 36 deletions

View file

@ -1,11 +1,10 @@
"""Export user account to tar.gz file for import into another Bookwyrm instance""" """Export user account to tar.gz file for import into another Bookwyrm instance"""
import logging import logging
from urllib.parse import urlparse, unquote import os
from boto3.session import Session as BotoSession from boto3.session import Session as BotoSession
from s3_tar import S3Tar from s3_tar import S3Tar
from storages.backends.s3boto3 import S3Boto3Storage
from django.db.models import BooleanField, FileField, JSONField from django.db.models import BooleanField, FileField, JSONField
from django.db.models import Q from django.db.models import Q
@ -13,7 +12,7 @@ from django.core.serializers.json import DjangoJSONEncoder
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.utils.module_loading import import_string from django.utils.module_loading import import_string
from bookwyrm import settings from bookwyrm import settings, storage_backends
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem
from bookwyrm.models import Review, Comment, Quotation from bookwyrm.models import Review, Comment, Quotation
@ -55,12 +54,6 @@ class BookwyrmExportJob(ParentJob):
self.save(update_fields=["task_id"]) self.save(update_fields=["task_id"])
def url2relativepath(url: str) -> str:
"""turn an absolute URL into a relative filesystem path"""
parsed = urlparse(url)
return unquote(parsed.path[1:])
@app.task(queue=IMPORTS) @app.task(queue=IMPORTS)
def create_export_json_task(job_id): def create_export_json_task(job_id):
"""create the JSON data for the export""" """create the JSON data for the export"""
@ -87,6 +80,22 @@ def create_export_json_task(job_id):
job.set_status("failed") job.set_status("failed")
def archive_file_location(file, directory="") -> str:
"""get the relative location of a file inside the archive"""
return os.path.join(directory, file.name)
def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""):
"""
add file to S3Tar inside directory, keeping any directories under its
storage location
"""
s3_tar.add_file(
os.path.join(storage.location, file.name),
folder=os.path.dirname(archive_file_location(file, directory=directory)),
)
@app.task(queue=IMPORTS) @app.task(queue=IMPORTS)
def create_archive_task(job_id): def create_archive_task(job_id):
"""create the archive containing the JSON file and additional files""" """create the archive containing the JSON file and additional files"""
@ -98,7 +107,7 @@ def create_archive_task(job_id):
return return
try: try:
export_task_id = job.task_id export_task_id = str(job.task_id)
archive_filename = f"{export_task_id}.tar.gz" archive_filename = f"{export_task_id}.tar.gz"
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8") export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")
@ -106,32 +115,39 @@ def create_archive_task(job_id):
editions = get_books_for_user(user) editions = get_books_for_user(user)
if settings.USE_S3: if settings.USE_S3:
# Connection for writing temporary files # Storage for writing temporary files
storage = S3Boto3Storage() exports_storage = storage_backends.ExportsS3Storage()
# Handle for creating the final archive # Handle for creating the final archive
s3_tar = S3Tar( s3_tar = S3Tar(
settings.AWS_STORAGE_BUCKET_NAME, exports_storage.bucket_name,
f"exports/{archive_filename}", os.path.join(exports_storage.location, archive_filename),
session=BookwyrmAwsSession(), session=BookwyrmAwsSession(),
) )
# Save JSON file to a temporary location # Save JSON file to a temporary location
export_json_tmp_file = f"exports/{export_task_id}/archive.json" export_json_tmp_file = os.path.join(export_task_id, "archive.json")
S3Boto3Storage.save( exports_storage.save(
storage,
export_json_tmp_file, export_json_tmp_file,
ContentFile(export_json_bytes), ContentFile(export_json_bytes),
) )
s3_tar.add_file(export_json_tmp_file) s3_tar.add_file(
os.path.join(exports_storage.location, export_json_tmp_file)
)
# Add images to TAR
images_storage = storage_backends.ImagesStorage()
# Add avatar image if present
if user.avatar: if user.avatar:
s3_tar.add_file(f"images/{user.avatar.name}") add_file_to_s3_tar(
s3_tar, images_storage, user.avatar, directory="images"
)
for edition in editions: for edition in editions:
if edition.cover: if edition.cover:
s3_tar.add_file(f"images/{edition.cover.name}") add_file_to_s3_tar(
s3_tar, images_storage, edition.cover, directory="images"
)
# Create archive and store file name # Create archive and store file name
s3_tar.tar() s3_tar.tar()
@ -139,7 +155,7 @@ def create_archive_task(job_id):
job.save(update_fields=["export_data"]) job.save(update_fields=["export_data"])
# Delete temporary files # Delete temporary files
S3Boto3Storage.delete(storage, export_json_tmp_file) exports_storage.delete(export_json_tmp_file)
else: else:
job.export_data = archive_filename job.export_data = archive_filename
@ -150,11 +166,11 @@ def create_archive_task(job_id):
# Add avatar image if present # Add avatar image if present
if user.avatar: if user.avatar:
tar.add_image(user.avatar, directory="images/") tar.add_image(user.avatar, directory="images")
for edition in editions: for edition in editions:
if edition.cover: if edition.cover:
tar.add_image(edition.cover, directory="images/") tar.add_image(edition.cover, directory="images")
job.save(update_fields=["export_data"]) job.save(update_fields=["export_data"])
job.set_status("completed") job.set_status("completed")
@ -179,8 +195,8 @@ def export_json(user: User):
def export_user(user: User): def export_user(user: User):
"""export user data""" """export user data"""
data = user.to_activity() data = user.to_activity()
if data.get("icon", False): if user.avatar:
data["icon"]["url"] = url2relativepath(data["icon"]["url"]) data["icon"]["url"] = archive_file_location(user.avatar, directory="images")
else: else:
data["icon"] = {} data["icon"] = {}
return data return data
@ -237,9 +253,9 @@ def export_book(user: User, edition: Edition):
data["work"] = edition.parent_work.to_activity() data["work"] = edition.parent_work.to_activity()
data["edition"] = edition.to_activity() data["edition"] = edition.to_activity()
if data["edition"].get("cover", False): if edition.cover:
data["edition"]["cover"]["url"] = url2relativepath( data["edition"]["cover"]["url"] = archive_file_location(
data["edition"]["cover"]["url"] edition.cover, directory="images"
) )
# authors # authors

View file

@ -1,5 +1,6 @@
"""manage tar files for user exports""" """manage tar files for user exports"""
import io import io
import os
import tarfile import tarfile
from typing import Any, Optional from typing import Any, Optional
from uuid import uuid4 from uuid import uuid4
@ -24,13 +25,13 @@ class BookwyrmTarFile(tarfile.TarFile):
:param str filename: overrides the file name set by image :param str filename: overrides the file name set by image
:param str directory: the directory in the archive to put the image :param str directory: the directory in the archive to put the image
""" """
if filename is not None: if filename is None:
file_type = image.name.rsplit(".", maxsplit=1)[-1] filename = image.name
filename = f"{directory}{filename}.{file_type}"
else: else:
filename = f"{directory}{image.name}" filename += os.path.splitext(image.name)[1]
path = os.path.join(directory, filename)
info = tarfile.TarInfo(name=filename) info = tarfile.TarInfo(name=path)
info.size = image.size info.size = image.size
self.addfile(info, fileobj=image) self.addfile(info, fileobj=image)
@ -43,7 +44,7 @@ class BookwyrmTarFile(tarfile.TarFile):
def write_image_to_file(self, filename: str, file_field: Any) -> None: def write_image_to_file(self, filename: str, file_field: Any) -> None:
"""add an image to the tar""" """add an image to the tar"""
extension = filename.rsplit(".")[-1] extension = os.path.splitext(filename)[1]
if buf := self.extractfile(filename): if buf := self.extractfile(filename):
filename = f"{str(uuid4())}.{extension}" filename = str(uuid4()) + extension
file_field.save(filename, File(buf)) file_field.save(filename, File(buf))