mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-25 02:51:13 +00:00
User export testing fixes
This commit is contained in:
parent
cdbc1d172c
commit
dabf7c6e10
2 changed files with 53 additions and 36 deletions
|
@ -1,11 +1,10 @@
|
|||
"""Export user account to tar.gz file for import into another Bookwyrm instance"""
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse, unquote
|
||||
import os
|
||||
|
||||
from boto3.session import Session as BotoSession
|
||||
from s3_tar import S3Tar
|
||||
from storages.backends.s3boto3 import S3Boto3Storage
|
||||
|
||||
from django.db.models import BooleanField, FileField, JSONField
|
||||
from django.db.models import Q
|
||||
|
@ -13,7 +12,7 @@ from django.core.serializers.json import DjangoJSONEncoder
|
|||
from django.core.files.base import ContentFile
|
||||
from django.utils.module_loading import import_string
|
||||
|
||||
from bookwyrm import settings
|
||||
from bookwyrm import settings, storage_backends
|
||||
|
||||
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem
|
||||
from bookwyrm.models import Review, Comment, Quotation
|
||||
|
@ -55,12 +54,6 @@ class BookwyrmExportJob(ParentJob):
|
|||
self.save(update_fields=["task_id"])
|
||||
|
||||
|
||||
def url2relativepath(url: str) -> str:
|
||||
"""turn an absolute URL into a relative filesystem path"""
|
||||
parsed = urlparse(url)
|
||||
return unquote(parsed.path[1:])
|
||||
|
||||
|
||||
@app.task(queue=IMPORTS)
|
||||
def create_export_json_task(job_id):
|
||||
"""create the JSON data for the export"""
|
||||
|
@ -87,6 +80,22 @@ def create_export_json_task(job_id):
|
|||
job.set_status("failed")
|
||||
|
||||
|
||||
def archive_file_location(file, directory="") -> str:
|
||||
"""get the relative location of a file inside the archive"""
|
||||
return os.path.join(directory, file.name)
|
||||
|
||||
|
||||
def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""):
|
||||
"""
|
||||
add file to S3Tar inside directory, keeping any directories under its
|
||||
storage location
|
||||
"""
|
||||
s3_tar.add_file(
|
||||
os.path.join(storage.location, file.name),
|
||||
folder=os.path.dirname(archive_file_location(file, directory=directory)),
|
||||
)
|
||||
|
||||
|
||||
@app.task(queue=IMPORTS)
|
||||
def create_archive_task(job_id):
|
||||
"""create the archive containing the JSON file and additional files"""
|
||||
|
@ -98,7 +107,7 @@ def create_archive_task(job_id):
|
|||
return
|
||||
|
||||
try:
|
||||
export_task_id = job.task_id
|
||||
export_task_id = str(job.task_id)
|
||||
archive_filename = f"{export_task_id}.tar.gz"
|
||||
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")
|
||||
|
||||
|
@ -106,32 +115,39 @@ def create_archive_task(job_id):
|
|||
editions = get_books_for_user(user)
|
||||
|
||||
if settings.USE_S3:
|
||||
# Connection for writing temporary files
|
||||
storage = S3Boto3Storage()
|
||||
# Storage for writing temporary files
|
||||
exports_storage = storage_backends.ExportsS3Storage()
|
||||
|
||||
# Handle for creating the final archive
|
||||
s3_tar = S3Tar(
|
||||
settings.AWS_STORAGE_BUCKET_NAME,
|
||||
f"exports/{archive_filename}",
|
||||
exports_storage.bucket_name,
|
||||
os.path.join(exports_storage.location, archive_filename),
|
||||
session=BookwyrmAwsSession(),
|
||||
)
|
||||
|
||||
# Save JSON file to a temporary location
|
||||
export_json_tmp_file = f"exports/{export_task_id}/archive.json"
|
||||
S3Boto3Storage.save(
|
||||
storage,
|
||||
export_json_tmp_file = os.path.join(export_task_id, "archive.json")
|
||||
exports_storage.save(
|
||||
export_json_tmp_file,
|
||||
ContentFile(export_json_bytes),
|
||||
)
|
||||
s3_tar.add_file(export_json_tmp_file)
|
||||
s3_tar.add_file(
|
||||
os.path.join(exports_storage.location, export_json_tmp_file)
|
||||
)
|
||||
|
||||
# Add images to TAR
|
||||
images_storage = storage_backends.ImagesStorage()
|
||||
|
||||
# Add avatar image if present
|
||||
if user.avatar:
|
||||
s3_tar.add_file(f"images/{user.avatar.name}")
|
||||
add_file_to_s3_tar(
|
||||
s3_tar, images_storage, user.avatar, directory="images"
|
||||
)
|
||||
|
||||
for edition in editions:
|
||||
if edition.cover:
|
||||
s3_tar.add_file(f"images/{edition.cover.name}")
|
||||
add_file_to_s3_tar(
|
||||
s3_tar, images_storage, edition.cover, directory="images"
|
||||
)
|
||||
|
||||
# Create archive and store file name
|
||||
s3_tar.tar()
|
||||
|
@ -139,7 +155,7 @@ def create_archive_task(job_id):
|
|||
job.save(update_fields=["export_data"])
|
||||
|
||||
# Delete temporary files
|
||||
S3Boto3Storage.delete(storage, export_json_tmp_file)
|
||||
exports_storage.delete(export_json_tmp_file)
|
||||
|
||||
else:
|
||||
job.export_data = archive_filename
|
||||
|
@ -150,11 +166,11 @@ def create_archive_task(job_id):
|
|||
|
||||
# Add avatar image if present
|
||||
if user.avatar:
|
||||
tar.add_image(user.avatar, directory="images/")
|
||||
tar.add_image(user.avatar, directory="images")
|
||||
|
||||
for edition in editions:
|
||||
if edition.cover:
|
||||
tar.add_image(edition.cover, directory="images/")
|
||||
tar.add_image(edition.cover, directory="images")
|
||||
job.save(update_fields=["export_data"])
|
||||
|
||||
job.set_status("completed")
|
||||
|
@ -179,8 +195,8 @@ def export_json(user: User):
|
|||
def export_user(user: User):
|
||||
"""export user data"""
|
||||
data = user.to_activity()
|
||||
if data.get("icon", False):
|
||||
data["icon"]["url"] = url2relativepath(data["icon"]["url"])
|
||||
if user.avatar:
|
||||
data["icon"]["url"] = archive_file_location(user.avatar, directory="images")
|
||||
else:
|
||||
data["icon"] = {}
|
||||
return data
|
||||
|
@ -237,9 +253,9 @@ def export_book(user: User, edition: Edition):
|
|||
data["work"] = edition.parent_work.to_activity()
|
||||
data["edition"] = edition.to_activity()
|
||||
|
||||
if data["edition"].get("cover", False):
|
||||
data["edition"]["cover"]["url"] = url2relativepath(
|
||||
data["edition"]["cover"]["url"]
|
||||
if edition.cover:
|
||||
data["edition"]["cover"]["url"] = archive_file_location(
|
||||
edition.cover, directory="images"
|
||||
)
|
||||
|
||||
# authors
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""manage tar files for user exports"""
|
||||
import io
|
||||
import os
|
||||
import tarfile
|
||||
from typing import Any, Optional
|
||||
from uuid import uuid4
|
||||
|
@ -24,13 +25,13 @@ class BookwyrmTarFile(tarfile.TarFile):
|
|||
:param str filename: overrides the file name set by image
|
||||
:param str directory: the directory in the archive to put the image
|
||||
"""
|
||||
if filename is not None:
|
||||
file_type = image.name.rsplit(".", maxsplit=1)[-1]
|
||||
filename = f"{directory}{filename}.{file_type}"
|
||||
if filename is None:
|
||||
filename = image.name
|
||||
else:
|
||||
filename = f"{directory}{image.name}"
|
||||
filename += os.path.splitext(image.name)[1]
|
||||
path = os.path.join(directory, filename)
|
||||
|
||||
info = tarfile.TarInfo(name=filename)
|
||||
info = tarfile.TarInfo(name=path)
|
||||
info.size = image.size
|
||||
|
||||
self.addfile(info, fileobj=image)
|
||||
|
@ -43,7 +44,7 @@ class BookwyrmTarFile(tarfile.TarFile):
|
|||
|
||||
def write_image_to_file(self, filename: str, file_field: Any) -> None:
|
||||
"""add an image to the tar"""
|
||||
extension = filename.rsplit(".")[-1]
|
||||
extension = os.path.splitext(filename)[1]
|
||||
if buf := self.extractfile(filename):
|
||||
filename = f"{str(uuid4())}.{extension}"
|
||||
filename = str(uuid4()) + extension
|
||||
file_field.save(filename, File(buf))
|
||||
|
|
Loading…
Reference in a new issue