Merge pull request #3228 from hughrun/user-export

Fix user exports to deal with s3 storage
This commit is contained in:
Bart Schuurmans 2024-04-13 22:53:58 +02:00 committed by GitHub
commit 21a39f8170
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 730 additions and 303 deletions

View file

@ -71,6 +71,9 @@ ENABLE_THUMBNAIL_GENERATION=true
USE_S3=false USE_S3=false
AWS_ACCESS_KEY_ID= AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY= AWS_SECRET_ACCESS_KEY=
# seconds for signed S3 urls to expire
# this is currently only used for user export files
S3_SIGNED_URL_EXPIRY=900
# Commented are example values if you use a non-AWS, S3-compatible service # Commented are example values if you use a non-AWS, S3-compatible service
# AWS S3 should work with only AWS_STORAGE_BUCKET_NAME and AWS_S3_REGION_NAME # AWS S3 should work with only AWS_STORAGE_BUCKET_NAME and AWS_S3_REGION_NAME

1
.gitignore vendored
View file

@ -16,6 +16,7 @@
# BookWyrm # BookWyrm
.env .env
/images/ /images/
/exports/
/static/ /static/
bookwyrm/static/css/bookwyrm.css bookwyrm/static/css/bookwyrm.css
bookwyrm/static/css/themes/ bookwyrm/static/css/themes/

View file

@ -0,0 +1,92 @@
# Generated by Django 3.2.23 on 2024-01-28 02:49
import bookwyrm.storage_backends
import django.core.serializers.json
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0192_sitesettings_user_exports_enabled"),
]
operations = [
migrations.AddField(
model_name="bookwyrmexportjob",
name="export_json",
field=models.JSONField(
encoder=django.core.serializers.json.DjangoJSONEncoder, null=True
),
),
migrations.AddField(
model_name="bookwyrmexportjob",
name="json_completed",
field=models.BooleanField(default=False),
),
migrations.AlterField(
model_name="bookwyrmexportjob",
name="export_data",
field=models.FileField(
null=True,
storage=bookwyrm.storage_backends.ExportsFileStorage,
upload_to="",
),
),
migrations.CreateModel(
name="AddFileToTar",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
(
"parent_export_job",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="child_edition_export_jobs",
to="bookwyrm.bookwyrmexportjob",
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
migrations.CreateModel(
name="AddBookToUserExportJob",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
(
"edition",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="bookwyrm.edition",
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
]

View file

@ -0,0 +1,13 @@
# Generated by Django 3.2.23 on 2024-03-18 17:37
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0193_auto_20240128_0249"),
("bookwyrm", "0195_alter_user_preferred_language"),
]
operations = []

View file

@ -0,0 +1,13 @@
# Generated by Django 3.2.25 on 2024-03-24 02:35
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0196_merge_20240318_1737"),
("bookwyrm", "0196_merge_pr3134_into_main"),
]
operations = []

View file

@ -0,0 +1,23 @@
# Generated by Django 3.2.25 on 2024-03-26 11:37
import bookwyrm.models.bookwyrm_export_job
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0197_merge_20240324_0235"),
]
operations = [
migrations.AlterField(
model_name="bookwyrmexportjob",
name="export_data",
field=models.FileField(
null=True,
storage=bookwyrm.models.bookwyrm_export_job.select_exports_storage,
upload_to="",
),
),
]

View file

@ -0,0 +1,13 @@
# Generated by Django 3.2.25 on 2024-03-26 12:17
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0198_alter_bookwyrmexportjob_export_data"),
("bookwyrm", "0198_book_search_vector_author_aliases"),
]
operations = []

View file

@ -0,0 +1,27 @@
# Generated by Django 3.2.25 on 2024-03-27 19:14
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0199_merge_20240326_1217"),
]
operations = [
migrations.RemoveField(
model_name="addfiletotar",
name="childjob_ptr",
),
migrations.RemoveField(
model_name="addfiletotar",
name="parent_export_job",
),
migrations.DeleteModel(
name="AddBookToUserExportJob",
),
migrations.DeleteModel(
name="AddFileToTar",
),
]

View file

@ -0,0 +1,13 @@
# Generated by Django 3.2.25 on 2024-04-13 02:32
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0200_auto_20240327_1914"),
("bookwyrm", "0204_merge_20240409_1042"),
]
operations = []

View file

@ -1,213 +1,318 @@
"""Export user account to tar.gz file for import into another Bookwyrm instance""" """Export user account to tar.gz file for import into another Bookwyrm instance"""
import dataclasses
import logging import logging
from uuid import uuid4 import os
from django.db.models import FileField from boto3.session import Session as BotoSession
from s3_tar import S3Tar
from django.db.models import BooleanField, FileField, JSONField
from django.db.models import Q from django.db.models import Q
from django.core.serializers.json import DjangoJSONEncoder from django.core.serializers.json import DjangoJSONEncoder
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.utils.module_loading import import_string
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, List, ListItem from bookwyrm import settings, storage_backends
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem
from bookwyrm.models import Review, Comment, Quotation from bookwyrm.models import Review, Comment, Quotation
from bookwyrm.models import Edition from bookwyrm.models import Edition
from bookwyrm.models import UserFollows, User, UserBlocks from bookwyrm.models import UserFollows, User, UserBlocks
from bookwyrm.models.job import ParentJob, ParentTask from bookwyrm.models.job import ParentJob
from bookwyrm.tasks import app, IMPORTS from bookwyrm.tasks import app, IMPORTS
from bookwyrm.utils.tar import BookwyrmTarFile from bookwyrm.utils.tar import BookwyrmTarFile
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class BookwyrmAwsSession(BotoSession):
"""a boto session that always uses settings.AWS_S3_ENDPOINT_URL"""
def client(self, *args, **kwargs): # pylint: disable=arguments-differ
kwargs["endpoint_url"] = settings.AWS_S3_ENDPOINT_URL
return super().client("s3", *args, **kwargs)
def select_exports_storage():
"""callable to allow for dependency on runtime configuration"""
cls = import_string(settings.EXPORTS_STORAGE)
return cls()
class BookwyrmExportJob(ParentJob): class BookwyrmExportJob(ParentJob):
"""entry for a specific request to export a bookwyrm user""" """entry for a specific request to export a bookwyrm user"""
export_data = FileField(null=True) export_data = FileField(null=True, storage=select_exports_storage)
export_json = JSONField(null=True, encoder=DjangoJSONEncoder)
json_completed = BooleanField(default=False)
def start_job(self): def start_job(self):
"""Start the job""" """schedule the first task"""
start_export_task.delay(job_id=self.id, no_children=True)
return self task = create_export_json_task.delay(job_id=self.id)
self.task_id = task.id
self.save(update_fields=["task_id"])
@app.task(queue=IMPORTS, base=ParentTask) @app.task(queue=IMPORTS)
def start_export_task(**kwargs): def create_export_json_task(job_id):
"""trigger the child tasks for each row""" """create the JSON data for the export"""
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])
job = BookwyrmExportJob.objects.get(id=job_id)
# don't start the job if it was stopped from the UI # don't start the job if it was stopped from the UI
if job.complete: if job.complete:
return return
try: try:
# This is where ChildJobs get made job.set_status("active")
job.export_data = ContentFile(b"", str(uuid4()))
json_data = json_export(job.user) # generate JSON structure
tar_export(json_data, job.user, job.export_data) job.export_json = export_json(job.user)
job.save(update_fields=["export_data"]) job.save(update_fields=["export_json"])
# create archive in separate task
create_archive_task.delay(job_id=job.id)
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
logger.exception("User Export Job %s Failed with error: %s", job.id, err) logger.exception(
"create_export_json_task for %s failed with error: %s", job, err
)
job.set_status("failed") job.set_status("failed")
job.set_status("complete")
def archive_file_location(file, directory="") -> str:
"""get the relative location of a file inside the archive"""
return os.path.join(directory, file.name)
def tar_export(json_data: str, user, file): def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""):
"""wrap the export information in a tar file""" """
file.open("wb") add file to S3Tar inside directory, keeping any directories under its
with BookwyrmTarFile.open(mode="w:gz", fileobj=file) as tar: storage location
tar.write_bytes(json_data.encode("utf-8")) """
s3_tar.add_file(
os.path.join(storage.location, file.name),
folder=os.path.dirname(archive_file_location(file, directory=directory)),
)
# Add avatar image if present
if getattr(user, "avatar", False):
tar.add_image(user.avatar, filename="avatar")
@app.task(queue=IMPORTS)
def create_archive_task(job_id):
"""create the archive containing the JSON file and additional files"""
job = BookwyrmExportJob.objects.get(id=job_id)
# don't start the job if it was stopped from the UI
if job.complete:
return
try:
export_task_id = str(job.task_id)
archive_filename = f"{export_task_id}.tar.gz"
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")
user = job.user
editions = get_books_for_user(user) editions = get_books_for_user(user)
for book in editions:
if getattr(book, "cover", False):
tar.add_image(book.cover)
file.close() if settings.USE_S3:
# Storage for writing temporary files
exports_storage = storage_backends.ExportsS3Storage()
# Handle for creating the final archive
s3_tar = S3Tar(
exports_storage.bucket_name,
os.path.join(exports_storage.location, archive_filename),
session=BookwyrmAwsSession(),
)
# Save JSON file to a temporary location
export_json_tmp_file = os.path.join(export_task_id, "archive.json")
exports_storage.save(
export_json_tmp_file,
ContentFile(export_json_bytes),
)
s3_tar.add_file(
os.path.join(exports_storage.location, export_json_tmp_file)
)
# Add images to TAR
images_storage = storage_backends.ImagesStorage()
if user.avatar:
add_file_to_s3_tar(s3_tar, images_storage, user.avatar)
for edition in editions:
if edition.cover:
add_file_to_s3_tar(
s3_tar, images_storage, edition.cover, directory="images"
)
# Create archive and store file name
s3_tar.tar()
job.export_data = archive_filename
job.save(update_fields=["export_data"])
# Delete temporary files
exports_storage.delete(export_json_tmp_file)
else:
job.export_data = archive_filename
with job.export_data.open("wb") as tar_file:
with BookwyrmTarFile.open(mode="w:gz", fileobj=tar_file) as tar:
# save json file
tar.write_bytes(export_json_bytes)
# Add avatar image if present
if user.avatar:
tar.add_image(user.avatar)
for edition in editions:
if edition.cover:
tar.add_image(edition.cover, directory="images")
job.save(update_fields=["export_data"])
job.set_status("completed")
except Exception as err: # pylint: disable=broad-except
logger.exception("create_archive_task for %s failed with error: %s", job, err)
job.set_status("failed")
def json_export( def export_json(user: User):
user, """create export JSON"""
): # pylint: disable=too-many-locals, too-many-statements, too-many-branches data = export_user(user) # in the root of the JSON structure
"""Generate an export for a user""" data["settings"] = export_settings(user)
data["goals"] = export_goals(user)
data["books"] = export_books(user)
data["saved_lists"] = export_saved_lists(user)
data["follows"] = export_follows(user)
data["blocks"] = export_blocks(user)
return data
# User as AP object
exported_user = user.to_activity() def export_user(user: User):
# I don't love this but it prevents a JSON encoding error """export user data"""
# when there is no user image data = user.to_activity()
if exported_user.get("icon") in (None, dataclasses.MISSING): if user.avatar:
exported_user["icon"] = {} data["icon"]["url"] = archive_file_location(user.avatar)
else: else:
# change the URL to be relative to the JSON file data["icon"] = {}
file_type = exported_user["icon"]["url"].rsplit(".", maxsplit=1)[-1] return data
filename = f"avatar.{file_type}"
exported_user["icon"]["url"] = filename
# Additional settings - can't be serialized as AP
def export_settings(user: User):
"""Additional settings - can't be serialized as AP"""
vals = [ vals = [
"show_goal", "show_goal",
"preferred_timezone", "preferred_timezone",
"default_post_privacy", "default_post_privacy",
"show_suggested_users", "show_suggested_users",
] ]
exported_user["settings"] = {} return {k: getattr(user, k) for k in vals}
for k in vals:
exported_user["settings"][k] = getattr(user, k)
# Reading goals - can't be serialized as AP
reading_goals = AnnualGoal.objects.filter(user=user).distinct()
exported_user["goals"] = []
for goal in reading_goals:
exported_user["goals"].append(
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy}
)
# Reading history - can't be serialized as AP def export_saved_lists(user: User):
readthroughs = ReadThrough.objects.filter(user=user).distinct().values() """add user saved lists to export JSON"""
readthroughs = list(readthroughs) return [l.remote_id for l in user.saved_lists.all()]
# Books
editions = get_books_for_user(user)
exported_user["books"] = []
for edition in editions: def export_follows(user: User):
book = {} """add user follows to export JSON"""
book["work"] = edition.parent_work.to_activity()
book["edition"] = edition.to_activity()
if book["edition"].get("cover"):
# change the URL to be relative to the JSON file
filename = book["edition"]["cover"]["url"].rsplit("/", maxsplit=1)[-1]
book["edition"]["cover"]["url"] = f"covers/{filename}"
# authors
book["authors"] = []
for author in edition.authors.all():
book["authors"].append(author.to_activity())
# Shelves this book is on
# Every ShelfItem is this book so we don't other serializing
book["shelves"] = []
shelf_books = (
ShelfBook.objects.select_related("shelf")
.filter(user=user, book=edition)
.distinct()
)
for shelfbook in shelf_books:
book["shelves"].append(shelfbook.shelf.to_activity())
# Lists and ListItems
# ListItems include "notes" and "approved" so we need them
# even though we know it's this book
book["lists"] = []
list_items = ListItem.objects.filter(book=edition, user=user).distinct()
for item in list_items:
list_info = item.book_list.to_activity()
list_info[
"privacy"
] = item.book_list.privacy # this isn't serialized so we add it
list_info["list_item"] = item.to_activity()
book["lists"].append(list_info)
# Statuses
# Can't use select_subclasses here because
# we need to filter on the "book" value,
# which is not available on an ordinary Status
for status in ["comments", "quotations", "reviews"]:
book[status] = []
comments = Comment.objects.filter(user=user, book=edition).all()
for status in comments:
obj = status.to_activity()
obj["progress"] = status.progress
obj["progress_mode"] = status.progress_mode
book["comments"].append(obj)
quotes = Quotation.objects.filter(user=user, book=edition).all()
for status in quotes:
obj = status.to_activity()
obj["position"] = status.position
obj["endposition"] = status.endposition
obj["position_mode"] = status.position_mode
book["quotations"].append(obj)
reviews = Review.objects.filter(user=user, book=edition).all()
for status in reviews:
obj = status.to_activity()
book["reviews"].append(obj)
# readthroughs can't be serialized to activity
book_readthroughs = (
ReadThrough.objects.filter(user=user, book=edition).distinct().values()
)
book["readthroughs"] = list(book_readthroughs)
# append everything
exported_user["books"].append(book)
# saved book lists - just the remote id
saved_lists = List.objects.filter(id__in=user.saved_lists.all()).distinct()
exported_user["saved_lists"] = [l.remote_id for l in saved_lists]
# follows - just the remote id
follows = UserFollows.objects.filter(user_subject=user).distinct() follows = UserFollows.objects.filter(user_subject=user).distinct()
following = User.objects.filter(userfollows_user_object__in=follows).distinct() following = User.objects.filter(userfollows_user_object__in=follows).distinct()
exported_user["follows"] = [f.remote_id for f in following] return [f.remote_id for f in following]
# blocks - just the remote id
def export_blocks(user: User):
"""add user blocks to export JSON"""
blocks = UserBlocks.objects.filter(user_subject=user).distinct() blocks = UserBlocks.objects.filter(user_subject=user).distinct()
blocking = User.objects.filter(userblocks_user_object__in=blocks).distinct() blocking = User.objects.filter(userblocks_user_object__in=blocks).distinct()
return [b.remote_id for b in blocking]
exported_user["blocks"] = [b.remote_id for b in blocking]
return DjangoJSONEncoder().encode(exported_user) def export_goals(user: User):
"""add user reading goals to export JSON"""
reading_goals = AnnualGoal.objects.filter(user=user).distinct()
return [
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy}
for goal in reading_goals
]
def export_books(user: User):
"""add books to export JSON"""
editions = get_books_for_user(user)
return [export_book(user, edition) for edition in editions]
def export_book(user: User, edition: Edition):
"""add book to export JSON"""
data = {}
data["work"] = edition.parent_work.to_activity()
data["edition"] = edition.to_activity()
if edition.cover:
data["edition"]["cover"]["url"] = archive_file_location(
edition.cover, directory="images"
)
# authors
data["authors"] = [author.to_activity() for author in edition.authors.all()]
# Shelves this book is on
# Every ShelfItem is this book so we don't other serializing
shelf_books = (
ShelfBook.objects.select_related("shelf")
.filter(user=user, book=edition)
.distinct()
)
data["shelves"] = [shelfbook.shelf.to_activity() for shelfbook in shelf_books]
# Lists and ListItems
# ListItems include "notes" and "approved" so we need them
# even though we know it's this book
list_items = ListItem.objects.filter(book=edition, user=user).distinct()
data["lists"] = []
for item in list_items:
list_info = item.book_list.to_activity()
list_info[
"privacy"
] = item.book_list.privacy # this isn't serialized so we add it
list_info["list_item"] = item.to_activity()
data["lists"].append(list_info)
# Statuses
# Can't use select_subclasses here because
# we need to filter on the "book" value,
# which is not available on an ordinary Status
for status in ["comments", "quotations", "reviews"]:
data[status] = []
comments = Comment.objects.filter(user=user, book=edition).all()
for status in comments:
obj = status.to_activity()
obj["progress"] = status.progress
obj["progress_mode"] = status.progress_mode
data["comments"].append(obj)
quotes = Quotation.objects.filter(user=user, book=edition).all()
for status in quotes:
obj = status.to_activity()
obj["position"] = status.position
obj["endposition"] = status.endposition
obj["position_mode"] = status.position_mode
data["quotations"].append(obj)
reviews = Review.objects.filter(user=user, book=edition).all()
data["reviews"] = [status.to_activity() for status in reviews]
# readthroughs can't be serialized to activity
book_readthroughs = (
ReadThrough.objects.filter(user=user, book=edition).distinct().values()
)
data["readthroughs"] = list(book_readthroughs)
return data
def get_books_for_user(user): def get_books_for_user(user):

View file

@ -42,20 +42,23 @@ def start_import_task(**kwargs):
try: try:
archive_file.open("rb") archive_file.open("rb")
with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar:
job.import_data = json.loads(tar.read("archive.json").decode("utf-8")) json_filename = next(
filter(lambda n: n.startswith("archive"), tar.getnames())
)
job.import_data = json.loads(tar.read(json_filename).decode("utf-8"))
if "include_user_profile" in job.required: if "include_user_profile" in job.required:
update_user_profile(job.user, tar, job.import_data) update_user_profile(job.user, tar, job.import_data)
if "include_user_settings" in job.required: if "include_user_settings" in job.required:
update_user_settings(job.user, job.import_data) update_user_settings(job.user, job.import_data)
if "include_goals" in job.required: if "include_goals" in job.required:
update_goals(job.user, job.import_data.get("goals")) update_goals(job.user, job.import_data.get("goals", []))
if "include_saved_lists" in job.required: if "include_saved_lists" in job.required:
upsert_saved_lists(job.user, job.import_data.get("saved_lists")) upsert_saved_lists(job.user, job.import_data.get("saved_lists", []))
if "include_follows" in job.required: if "include_follows" in job.required:
upsert_follows(job.user, job.import_data.get("follows")) upsert_follows(job.user, job.import_data.get("follows", []))
if "include_blocks" in job.required: if "include_blocks" in job.required:
upsert_user_blocks(job.user, job.import_data.get("blocks")) upsert_user_blocks(job.user, job.import_data.get("blocks", []))
process_books(job, tar) process_books(job, tar)
@ -212,7 +215,7 @@ def upsert_statuses(user, cls, data, book_remote_id):
instance.save() # save and broadcast instance.save() # save and broadcast
else: else:
logger.info("User does not have permission to import statuses") logger.warning("User does not have permission to import statuses")
def upsert_lists(user, lists, book_id): def upsert_lists(user, lists, book_id):

View file

@ -135,8 +135,7 @@ class ParentJob(Job):
) )
app.control.revoke(list(tasks)) app.control.revoke(list(tasks))
for task in self.pending_child_jobs: self.pending_child_jobs.update(status=self.Status.STOPPED)
task.update(status=self.Status.STOPPED)
@property @property
def has_completed(self): def has_completed(self):
@ -248,7 +247,7 @@ class SubTask(app.Task):
""" """
def before_start( def before_start(
self, task_id, args, kwargs self, task_id, *args, **kwargs
): # pylint: disable=no-self-use, unused-argument ): # pylint: disable=no-self-use, unused-argument
"""Handler called before the task starts. Override. """Handler called before the task starts. Override.
@ -272,7 +271,7 @@ class SubTask(app.Task):
child_job.set_status(ChildJob.Status.ACTIVE) child_job.set_status(ChildJob.Status.ACTIVE)
def on_success( def on_success(
self, retval, task_id, args, kwargs self, retval, task_id, *args, **kwargs
): # pylint: disable=no-self-use, unused-argument ): # pylint: disable=no-self-use, unused-argument
"""Run by the worker if the task executes successfully. Override. """Run by the worker if the task executes successfully. Override.

View file

@ -374,6 +374,7 @@ if USE_HTTPS:
USE_S3 = env.bool("USE_S3", False) USE_S3 = env.bool("USE_S3", False)
USE_AZURE = env.bool("USE_AZURE", False) USE_AZURE = env.bool("USE_AZURE", False)
S3_SIGNED_URL_EXPIRY = env.int("S3_SIGNED_URL_EXPIRY", 900)
if USE_S3: if USE_S3:
# AWS settings # AWS settings
@ -388,16 +389,20 @@ if USE_S3:
# S3 Static settings # S3 Static settings
STATIC_LOCATION = "static" STATIC_LOCATION = "static"
STATIC_URL = f"{PROTOCOL}://{AWS_S3_CUSTOM_DOMAIN}/{STATIC_LOCATION}/" STATIC_URL = f"{PROTOCOL}://{AWS_S3_CUSTOM_DOMAIN}/{STATIC_LOCATION}/"
STATIC_FULL_URL = STATIC_URL
STATICFILES_STORAGE = "bookwyrm.storage_backends.StaticStorage" STATICFILES_STORAGE = "bookwyrm.storage_backends.StaticStorage"
# S3 Media settings # S3 Media settings
MEDIA_LOCATION = "images" MEDIA_LOCATION = "images"
MEDIA_URL = f"{PROTOCOL}://{AWS_S3_CUSTOM_DOMAIN}/{MEDIA_LOCATION}/" MEDIA_URL = f"{PROTOCOL}://{AWS_S3_CUSTOM_DOMAIN}/{MEDIA_LOCATION}/"
MEDIA_FULL_URL = MEDIA_URL MEDIA_FULL_URL = MEDIA_URL
STATIC_FULL_URL = STATIC_URL
DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.ImagesStorage" DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.ImagesStorage"
# S3 Exports settings
EXPORTS_STORAGE = "bookwyrm.storage_backends.ExportsS3Storage"
# Content Security Policy
CSP_DEFAULT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS CSP_DEFAULT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
CSP_SCRIPT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS CSP_SCRIPT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
elif USE_AZURE: elif USE_AZURE:
# Azure settings
AZURE_ACCOUNT_NAME = env("AZURE_ACCOUNT_NAME") AZURE_ACCOUNT_NAME = env("AZURE_ACCOUNT_NAME")
AZURE_ACCOUNT_KEY = env("AZURE_ACCOUNT_KEY") AZURE_ACCOUNT_KEY = env("AZURE_ACCOUNT_KEY")
AZURE_CONTAINER = env("AZURE_CONTAINER") AZURE_CONTAINER = env("AZURE_CONTAINER")
@ -407,6 +412,7 @@ elif USE_AZURE:
STATIC_URL = ( STATIC_URL = (
f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{STATIC_LOCATION}/" f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{STATIC_LOCATION}/"
) )
STATIC_FULL_URL = STATIC_URL
STATICFILES_STORAGE = "bookwyrm.storage_backends.AzureStaticStorage" STATICFILES_STORAGE = "bookwyrm.storage_backends.AzureStaticStorage"
# Azure Media settings # Azure Media settings
MEDIA_LOCATION = "images" MEDIA_LOCATION = "images"
@ -414,15 +420,24 @@ elif USE_AZURE:
f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{MEDIA_LOCATION}/" f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{MEDIA_LOCATION}/"
) )
MEDIA_FULL_URL = MEDIA_URL MEDIA_FULL_URL = MEDIA_URL
STATIC_FULL_URL = STATIC_URL
DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.AzureImagesStorage" DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.AzureImagesStorage"
# Azure Exports settings
EXPORTS_STORAGE = None # not implemented yet
# Content Security Policy
CSP_DEFAULT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS CSP_DEFAULT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
CSP_SCRIPT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS CSP_SCRIPT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
else: else:
# Static settings
STATIC_URL = "/static/" STATIC_URL = "/static/"
STATIC_FULL_URL = f"{PROTOCOL}://{DOMAIN}{STATIC_URL}"
STATICFILES_STORAGE = "django.contrib.staticfiles.storage.StaticFilesStorage"
# Media settings
MEDIA_URL = "/images/" MEDIA_URL = "/images/"
MEDIA_FULL_URL = f"{PROTOCOL}://{DOMAIN}{MEDIA_URL}" MEDIA_FULL_URL = f"{PROTOCOL}://{DOMAIN}{MEDIA_URL}"
STATIC_FULL_URL = f"{PROTOCOL}://{DOMAIN}{STATIC_URL}" DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage"
# Exports settings
EXPORTS_STORAGE = "bookwyrm.storage_backends.ExportsFileStorage"
# Content Security Policy
CSP_DEFAULT_SRC = ["'self'"] + CSP_ADDITIONAL_HOSTS CSP_DEFAULT_SRC = ["'self'"] + CSP_ADDITIONAL_HOSTS
CSP_SCRIPT_SRC = ["'self'"] + CSP_ADDITIONAL_HOSTS CSP_SCRIPT_SRC = ["'self'"] + CSP_ADDITIONAL_HOSTS

View file

@ -1,6 +1,7 @@
"""Handles backends for storages""" """Handles backends for storages"""
import os import os
from tempfile import SpooledTemporaryFile from tempfile import SpooledTemporaryFile
from django.core.files.storage import FileSystemStorage
from storages.backends.s3boto3 import S3Boto3Storage from storages.backends.s3boto3 import S3Boto3Storage
from storages.backends.azure_storage import AzureStorage from storages.backends.azure_storage import AzureStorage
@ -61,3 +62,18 @@ class AzureImagesStorage(AzureStorage): # pylint: disable=abstract-method
location = "images" location = "images"
overwrite_files = False overwrite_files = False
class ExportsFileStorage(FileSystemStorage): # pylint: disable=abstract-method
"""Storage class for exports contents with local files"""
location = "exports"
overwrite_files = False
class ExportsS3Storage(S3Boto3Storage): # pylint: disable=abstract-method
"""Storage class for exports contents with S3"""
location = "exports"
default_acl = None
overwrite_files = False

View file

@ -97,25 +97,25 @@
</td> </td>
</tr> </tr>
{% endif %} {% endif %}
{% for job in jobs %} {% for export in jobs %}
<tr> <tr>
<td>{{ job.updated_date }}</td> <td>{{ export.job.updated_date }}</td>
<td> <td>
<span <span
{% if job.status == "stopped" or job.status == "failed" %} {% if export.job.status == "stopped" or export.job.status == "failed" %}
class="tag is-danger" class="tag is-danger"
{% elif job.status == "pending" %} {% elif export.job.status == "pending" %}
class="tag is-warning" class="tag is-warning"
{% elif job.complete %} {% elif export.job.complete %}
class="tag" class="tag"
{% else %} {% else %}
class="tag is-success" class="tag is-success"
{% endif %} {% endif %}
> >
{% if job.status %} {% if export.job.status %}
{{ job.status }} {{ export.job.status }}
{{ job.status_display }} {{ export.job.status_display }}
{% elif job.complete %} {% elif export.job.complete %}
{% trans "Complete" %} {% trans "Complete" %}
{% else %} {% else %}
{% trans "Active" %} {% trans "Active" %}
@ -123,18 +123,20 @@
</span> </span>
</td> </td>
<td> <td>
<span>{{ job.export_data|get_file_size }}</span> {% if export.size %}
<span>{{ export.size|get_file_size }}</span>
{% endif %}
</td> </td>
<td> <td>
{% if job.complete and not job.status == "stopped" and not job.status == "failed" %} {% if export.url %}
<p> <a href="{{ export.url }}">
<a download="" href="/preferences/user-export/{{ job.task_id }}"> <span class="icon icon-download" aria-hidden="true"></span>
<span class="icon icon-download" aria-hidden="true"></span> <span class="is-hidden-mobile">
<span class="is-hidden-mobile"> {% trans "Download your export" %}
{% trans "Download your export" %} </span>
</span> </a>
</a> {% elif export.unavailable %}
</p> {% trans "Archive is no longer available" %}
{% endif %} {% endif %}
</td> </td>
</tr> </tr>

View file

@ -157,13 +157,13 @@
> >
<div class="notification is-danger is-light"> <div class="notification is-danger is-light">
<p class="my-2">{% trans "Users are currently unable to start new user exports. This is the default setting." %}</p> <p class="my-2">{% trans "Users are currently unable to start new user exports. This is the default setting." %}</p>
{% if use_s3 %} {% if use_azure %}
<p>{% trans "It is not currently possible to provide user exports when using s3 storage. The BookWyrm development team are working on a fix for this." %}</p> <p>{% trans "It is not currently possible to provide user exports when using Azure storage." %}</p>
{% endif %} {% endif %}
</div> </div>
{% csrf_token %} {% csrf_token %}
<div class="control"> <div class="control">
<button type="submit" class="button is-success" {% if use_s3 %}disabled{% endif %}> <button type="submit" class="button is-success" {% if use_azure %}disabled{% endif %}>
{% trans "Enable user exports" %} {% trans "Enable user exports" %}
</button> </button>
</div> </div>

View file

@ -130,11 +130,14 @@ def id_to_username(user_id):
@register.filter(name="get_file_size") @register.filter(name="get_file_size")
def get_file_size(file): def get_file_size(nbytes):
"""display the size of a file in human readable terms""" """display the size of a file in human readable terms"""
try: try:
raw_size = os.stat(file.path).st_size raw_size = float(nbytes)
except (ValueError, TypeError):
return repr(nbytes)
else:
if raw_size < 1024: if raw_size < 1024:
return f"{raw_size} bytes" return f"{raw_size} bytes"
if raw_size < 1024**2: if raw_size < 1024**2:
@ -142,8 +145,6 @@ def get_file_size(file):
if raw_size < 1024**3: if raw_size < 1024**3:
return f"{raw_size/1024**2:.2f} MB" return f"{raw_size/1024**2:.2f} MB"
return f"{raw_size/1024**3:.2f} GB" return f"{raw_size/1024**3:.2f} GB"
except Exception: # pylint: disable=broad-except
return ""
@register.filter(name="get_user_permission") @register.filter(name="get_user_permission")

View file

@ -1,17 +1,18 @@
"""test bookwyrm user export functions""" """test bookwyrm user export functions"""
import datetime import datetime
import json import json
import pathlib
from unittest.mock import patch from unittest.mock import patch
from django.core.serializers.json import DjangoJSONEncoder
from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from django.test import TestCase
from bookwyrm import models from bookwyrm import models
import bookwyrm.models.bookwyrm_export_job as export_job from bookwyrm.utils.tar import BookwyrmTarFile
class BookwyrmExport(TestCase): class BookwyrmExportJob(TestCase):
"""testing user export functions""" """testing user export functions"""
def setUp(self): def setUp(self):
@ -42,6 +43,11 @@ class BookwyrmExport(TestCase):
preferred_timezone="America/Los Angeles", preferred_timezone="America/Los Angeles",
default_post_privacy="followers", default_post_privacy="followers",
) )
avatar_path = pathlib.Path(__file__).parent.joinpath(
"../../static/images/default_avi.jpg"
)
with open(avatar_path, "rb") as avatar_file:
self.local_user.avatar.save("mouse-avatar.jpg", avatar_file)
self.rat_user = models.User.objects.create_user( self.rat_user = models.User.objects.create_user(
"rat", "rat@rat.rat", "ratword", local=True, localname="rat" "rat", "rat@rat.rat", "ratword", local=True, localname="rat"
@ -87,6 +93,13 @@ class BookwyrmExport(TestCase):
title="Example Edition", parent_work=self.work title="Example Edition", parent_work=self.work
) )
# edition cover
cover_path = pathlib.Path(__file__).parent.joinpath(
"../../static/images/default_avi.jpg"
)
with open(cover_path, "rb") as cover_file:
self.edition.cover.save("tèst.jpg", cover_file)
self.edition.authors.add(self.author) self.edition.authors.add(self.author)
# readthrough # readthrough
@ -139,91 +152,105 @@ class BookwyrmExport(TestCase):
book=self.edition, book=self.edition,
) )
def test_json_export_user_settings(self): self.job = models.BookwyrmExportJob.objects.create(user=self.local_user)
"""Test the json export function for basic user info"""
data = export_job.json_export(self.local_user) # run the first stage of the export
user_data = json.loads(data) with patch("bookwyrm.models.bookwyrm_export_job.create_archive_task.delay"):
self.assertEqual(user_data["preferredUsername"], "mouse") models.bookwyrm_export_job.create_export_json_task(job_id=self.job.id)
self.assertEqual(user_data["name"], "Mouse") self.job.refresh_from_db()
self.assertEqual(user_data["summary"], "<p>I'm a real bookmouse</p>")
self.assertEqual(user_data["manuallyApprovesFollowers"], False) def test_add_book_to_user_export_job(self):
self.assertEqual(user_data["hideFollows"], False) """does AddBookToUserExportJob ...add the book to the export?"""
self.assertEqual(user_data["discoverable"], True) self.assertIsNotNone(self.job.export_json["books"])
self.assertEqual(user_data["settings"]["show_goal"], False) self.assertEqual(len(self.job.export_json["books"]), 1)
self.assertEqual(user_data["settings"]["show_suggested_users"], False) book = self.job.export_json["books"][0]
self.assertEqual(book["work"]["id"], self.work.remote_id)
self.assertEqual(len(book["authors"]), 1)
self.assertEqual(len(book["shelves"]), 1)
self.assertEqual(len(book["lists"]), 1)
self.assertEqual(len(book["comments"]), 1)
self.assertEqual(len(book["reviews"]), 1)
self.assertEqual(len(book["quotations"]), 1)
self.assertEqual(len(book["readthroughs"]), 1)
self.assertEqual(book["edition"]["id"], self.edition.remote_id)
self.assertEqual( self.assertEqual(
user_data["settings"]["preferred_timezone"], "America/Los Angeles" book["edition"]["cover"]["url"], f"images/{self.edition.cover.name}"
)
self.assertEqual(user_data["settings"]["default_post_privacy"], "followers")
def test_json_export_extended_user_data(self):
"""Test the json export function for other non-book user info"""
data = export_job.json_export(self.local_user)
json_data = json.loads(data)
# goal
self.assertEqual(len(json_data["goals"]), 1)
self.assertEqual(json_data["goals"][0]["goal"], 128937123)
self.assertEqual(json_data["goals"][0]["year"], timezone.now().year)
self.assertEqual(json_data["goals"][0]["privacy"], "followers")
# saved lists
self.assertEqual(len(json_data["saved_lists"]), 1)
self.assertEqual(json_data["saved_lists"][0], "https://local.lists/9999")
# follows
self.assertEqual(len(json_data["follows"]), 1)
self.assertEqual(json_data["follows"][0], "https://your.domain.here/user/rat")
# blocked users
self.assertEqual(len(json_data["blocks"]), 1)
self.assertEqual(json_data["blocks"][0], "https://your.domain.here/user/badger")
def test_json_export_books(self):
"""Test the json export function for extended user info"""
data = export_job.json_export(self.local_user)
json_data = json.loads(data)
start_date = json_data["books"][0]["readthroughs"][0]["start_date"]
self.assertEqual(len(json_data["books"]), 1)
self.assertEqual(json_data["books"][0]["edition"]["title"], "Example Edition")
self.assertEqual(len(json_data["books"][0]["authors"]), 1)
self.assertEqual(json_data["books"][0]["authors"][0]["name"], "Sam Zhu")
self.assertEqual(
f'"{start_date}"', DjangoJSONEncoder().encode(self.readthrough_start)
) )
self.assertEqual(json_data["books"][0]["shelves"][0]["name"], "Read") def test_start_export_task(self):
"""test saved list task saves initial json and data"""
self.assertIsNotNone(self.job.export_data)
self.assertIsNotNone(self.job.export_json)
self.assertEqual(self.job.export_json["name"], self.local_user.name)
self.assertEqual(len(json_data["books"][0]["lists"]), 1) def test_export_saved_lists_task(self):
self.assertEqual(json_data["books"][0]["lists"][0]["name"], "My excellent list") """test export_saved_lists_task adds the saved lists"""
self.assertIsNotNone(self.job.export_json["saved_lists"])
self.assertEqual( self.assertEqual(
json_data["books"][0]["lists"][0]["list_item"]["book"], self.job.export_json["saved_lists"][0], self.saved_list.remote_id
self.edition.remote_id,
self.edition.id,
) )
self.assertEqual(len(json_data["books"][0]["reviews"]), 1) def test_export_follows_task(self):
self.assertEqual(len(json_data["books"][0]["comments"]), 1) """test export_follows_task adds the follows"""
self.assertEqual(len(json_data["books"][0]["quotations"]), 1) self.assertIsNotNone(self.job.export_json["follows"])
self.assertEqual(self.job.export_json["follows"][0], self.rat_user.remote_id)
self.assertEqual(json_data["books"][0]["reviews"][0]["name"], "my review") def test_export_blocks_task(self):
self.assertEqual( """test export_blocks_task adds the blocks"""
json_data["books"][0]["reviews"][0]["content"], "<p>awesome</p>" self.assertIsNotNone(self.job.export_json["blocks"])
) self.assertEqual(self.job.export_json["blocks"][0], self.badger_user.remote_id)
self.assertEqual(json_data["books"][0]["reviews"][0]["rating"], 5.0)
self.assertEqual( def test_export_reading_goals_task(self):
json_data["books"][0]["comments"][0]["content"], "<p>ok so far</p>" """test export_reading_goals_task adds the goals"""
) self.assertIsNotNone(self.job.export_json["goals"])
self.assertEqual(json_data["books"][0]["comments"][0]["progress"], 15) self.assertEqual(self.job.export_json["goals"][0]["goal"], 128937123)
self.assertEqual(json_data["books"][0]["comments"][0]["progress_mode"], "PG")
def test_json_export(self):
"""test json_export job adds settings"""
self.assertIsNotNone(self.job.export_json["settings"])
self.assertFalse(self.job.export_json["settings"]["show_goal"])
self.assertEqual( self.assertEqual(
json_data["books"][0]["quotations"][0]["content"], "<p>check this out</p>" self.job.export_json["settings"]["preferred_timezone"],
"America/Los Angeles",
) )
self.assertEqual( self.assertEqual(
json_data["books"][0]["quotations"][0]["quote"], self.job.export_json["settings"]["default_post_privacy"], "followers"
"<p>A rose by any other name</p>",
) )
self.assertFalse(self.job.export_json["settings"]["show_suggested_users"])
def test_get_books_for_user(self):
"""does get_books_for_user get all the books"""
data = models.bookwyrm_export_job.get_books_for_user(self.local_user)
self.assertEqual(len(data), 1)
self.assertEqual(data[0].title, "Example Edition")
def test_archive(self):
"""actually create the TAR file"""
models.bookwyrm_export_job.create_archive_task(job_id=self.job.id)
self.job.refresh_from_db()
with (
self.job.export_data.open("rb") as tar_file,
BookwyrmTarFile.open(mode="r", fileobj=tar_file) as tar,
):
archive_json_file = tar.extractfile("archive.json")
data = json.load(archive_json_file)
# JSON from the archive should be what we want it to be
self.assertEqual(data, self.job.export_json)
# User avatar should be present in archive
with self.local_user.avatar.open() as expected_avatar:
archive_avatar = tar.extractfile(data["icon"]["url"])
self.assertEqual(expected_avatar.read(), archive_avatar.read())
# Edition cover should be present in archive
with self.edition.cover.open() as expected_cover:
archive_cover = tar.extractfile(
data["books"][0]["edition"]["cover"]["url"]
)
self.assertEqual(expected_cover.read(), archive_cover.read())

View file

@ -42,7 +42,7 @@ class ExportUserViews(TestCase):
request = self.factory.post("") request = self.factory.post("")
request.user = self.local_user request.user = self.local_user
with patch("bookwyrm.models.bookwyrm_export_job.start_export_task.delay"): with patch("bookwyrm.models.bookwyrm_export_job.BookwyrmExportJob.start_job"):
export = views.ExportUser.as_view()(request) export = views.ExportUser.as_view()(request)
self.assertIsInstance(export, HttpResponse) self.assertIsInstance(export, HttpResponse)
self.assertEqual(export.status_code, 302) self.assertEqual(export.status_code, 302)

View file

@ -1,5 +1,6 @@
"""manage tar files for user exports""" """manage tar files for user exports"""
import io import io
import os
import tarfile import tarfile
from typing import Any, Optional from typing import Any, Optional
from uuid import uuid4 from uuid import uuid4
@ -17,20 +18,20 @@ class BookwyrmTarFile(tarfile.TarFile):
self.addfile(info, fileobj=buffer) self.addfile(info, fileobj=buffer)
def add_image( def add_image(
self, image: Any, filename: Optional[str] = None, directory: Any = "" self, image: Any, filename: Optional[str] = None, directory: str = ""
) -> None: ) -> None:
""" """
Add an image to the tar archive Add an image to the tar archive
:param str filename: overrides the file name set by image :param str filename: overrides the file name set by image
:param str directory: the directory in the archive to put the image :param str directory: the directory in the archive to put the image
""" """
if filename is not None: if filename is None:
file_type = image.name.rsplit(".", maxsplit=1)[-1] dst_filename = image.name
filename = f"{directory}{filename}.{file_type}"
else: else:
filename = f"{directory}{image.name}" dst_filename = filename + os.path.splitext(image.name)[1]
dst_path = os.path.join(directory, dst_filename)
info = tarfile.TarInfo(name=filename) info = tarfile.TarInfo(name=dst_path)
info.size = image.size info.size = image.size
self.addfile(info, fileobj=image) self.addfile(info, fileobj=image)
@ -43,7 +44,7 @@ class BookwyrmTarFile(tarfile.TarFile):
def write_image_to_file(self, filename: str, file_field: Any) -> None: def write_image_to_file(self, filename: str, file_field: Any) -> None:
"""add an image to the tar""" """add an image to the tar"""
extension = filename.rsplit(".")[-1] extension = os.path.splitext(filename)[1]
if buf := self.extractfile(filename): if buf := self.extractfile(filename):
filename = f"{str(uuid4())}.{extension}" filename = str(uuid4()) + extension
file_field.save(filename, File(buf)) file_field.save(filename, File(buf))

View file

@ -9,7 +9,7 @@ from django.views.decorators.http import require_POST
from bookwyrm import models from bookwyrm import models
from bookwyrm.views.helpers import redirect_to_referer from bookwyrm.views.helpers import redirect_to_referer
from bookwyrm.settings import PAGE_LENGTH, USE_S3 from bookwyrm.settings import PAGE_LENGTH, USE_AZURE
# pylint: disable=no-self-use # pylint: disable=no-self-use
@ -59,7 +59,7 @@ class ImportList(View):
"import_size_limit": site_settings.import_size_limit, "import_size_limit": site_settings.import_size_limit,
"import_limit_reset": site_settings.import_limit_reset, "import_limit_reset": site_settings.import_limit_reset,
"user_import_time_limit": site_settings.user_import_time_limit, "user_import_time_limit": site_settings.user_import_time_limit,
"use_s3": USE_S3, "use_azure": USE_AZURE,
} }
return TemplateResponse(request, "settings/imports/imports.html", data) return TemplateResponse(request, "settings/imports/imports.html", data)

View file

@ -6,16 +6,19 @@ import io
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.db.models import Q from django.db.models import Q
from django.http import HttpResponse from django.http import HttpResponse, HttpResponseServerError, Http404
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from django.utils import timezone from django.utils import timezone
from django.views import View from django.views import View
from django.urls import reverse
from django.utils.decorators import method_decorator from django.utils.decorators import method_decorator
from django.shortcuts import redirect from django.shortcuts import redirect
from bookwyrm import models from storages.backends.s3boto3 import S3Boto3Storage
from bookwyrm import models, storage_backends
from bookwyrm.models.bookwyrm_export_job import BookwyrmExportJob from bookwyrm.models.bookwyrm_export_job import BookwyrmExportJob
from bookwyrm.settings import PAGE_LENGTH from bookwyrm import settings
# pylint: disable=no-self-use,too-many-locals # pylint: disable=no-self-use,too-many-locals
@ -144,25 +147,53 @@ class Export(View):
# pylint: disable=no-self-use # pylint: disable=no-self-use
@method_decorator(login_required, name="dispatch") @method_decorator(login_required, name="dispatch")
class ExportUser(View): class ExportUser(View):
"""Let users export user data to import into another Bookwyrm instance""" """
Let users request and download an archive of user data to import into
another Bookwyrm instance.
"""
user_jobs = None
def setup(self, request, *args, **kwargs):
super().setup(request, *args, **kwargs)
self.user_jobs = BookwyrmExportJob.objects.filter(user=request.user).order_by(
"-created_date"
)
def new_export_blocked_until(self):
"""whether the user is allowed to request a new export"""
last_job = self.user_jobs.first()
if not last_job:
return None
site = models.SiteSettings.objects.get()
blocked_until = last_job.created_date + timedelta(
hours=site.user_import_time_limit
)
return blocked_until if blocked_until > timezone.now() else None
def get(self, request): def get(self, request):
"""Request tar file""" """Request tar file"""
jobs = BookwyrmExportJob.objects.filter(user=request.user).order_by( exports = []
"-created_date" for job in self.user_jobs:
) export = {"job": job}
site = models.SiteSettings.objects.get()
hours = site.user_import_time_limit if job.export_data:
allowed = ( try:
jobs.first().created_date < timezone.now() - timedelta(hours=hours) export["size"] = job.export_data.size
if jobs.first() export["url"] = reverse("prefs-export-file", args=[job.task_id])
else True except FileNotFoundError:
) # file no longer exists locally
next_available = ( export["unavailable"] = True
jobs.first().created_date + timedelta(hours=hours) if not allowed else False except Exception: # pylint: disable=broad-except
) # file no longer exists on storage backend
paginated = Paginator(jobs, PAGE_LENGTH) export["unavailable"] = True
exports.append(export)
next_available = self.new_export_blocked_until()
paginated = Paginator(exports, settings.PAGE_LENGTH)
page = paginated.get_page(request.GET.get("page")) page = paginated.get_page(request.GET.get("page"))
data = { data = {
"jobs": page, "jobs": page,
@ -175,7 +206,9 @@ class ExportUser(View):
return TemplateResponse(request, "preferences/export-user.html", data) return TemplateResponse(request, "preferences/export-user.html", data)
def post(self, request): def post(self, request):
"""Download the json file of a user's data""" """Trigger processing of a new user export file"""
if self.new_export_blocked_until() is not None:
return HttpResponse(status=429) # Too Many Requests
job = BookwyrmExportJob.objects.create(user=request.user) job = BookwyrmExportJob.objects.create(user=request.user)
job.start_job() job.start_job()
@ -187,13 +220,35 @@ class ExportUser(View):
class ExportArchive(View): class ExportArchive(View):
"""Serve the archive file""" """Serve the archive file"""
# TODO: how do we serve s3 files?
def get(self, request, archive_id): def get(self, request, archive_id):
"""download user export file""" """download user export file"""
export = BookwyrmExportJob.objects.get(task_id=archive_id, user=request.user) export = BookwyrmExportJob.objects.get(task_id=archive_id, user=request.user)
return HttpResponse(
export.export_data, if isinstance(export.export_data.storage, storage_backends.ExportsS3Storage):
content_type="application/gzip", # make custom_domain None so we can sign the url
headers={ # see https://github.com/jschneier/django-storages/issues/944
"Content-Disposition": 'attachment; filename="bookwyrm-account-export.tar.gz"' # pylint: disable=line-too-long storage = S3Boto3Storage(querystring_auth=True, custom_domain=None)
}, try:
) url = S3Boto3Storage.url(
storage,
f"/exports/{export.task_id}.tar.gz",
expire=settings.S3_SIGNED_URL_EXPIRY,
)
except Exception:
raise Http404()
return redirect(url)
if isinstance(export.export_data.storage, storage_backends.ExportsFileStorage):
try:
return HttpResponse(
export.export_data,
content_type="application/gzip",
headers={
"Content-Disposition": 'attachment; filename="bookwyrm-account-export.tar.gz"' # pylint: disable=line-too-long
},
)
except FileNotFoundError:
raise Http404()
return HttpResponseServerError()

View file

@ -29,6 +29,7 @@ services:
- .:/app - .:/app
- static_volume:/app/static - static_volume:/app/static
- media_volume:/app/images - media_volume:/app/images
- exports_volume:/app/exports
depends_on: depends_on:
- db - db
- celery_worker - celery_worker
@ -67,6 +68,7 @@ services:
- .:/app - .:/app
- static_volume:/app/static - static_volume:/app/static
- media_volume:/app/images - media_volume:/app/images
- exports_volume:/app/exports
depends_on: depends_on:
- db - db
- redis_broker - redis_broker
@ -81,6 +83,7 @@ services:
- .:/app - .:/app
- static_volume:/app/static - static_volume:/app/static
- media_volume:/app/images - media_volume:/app/images
- exports_volume:/app/exports
depends_on: depends_on:
- celery_worker - celery_worker
restart: on-failure restart: on-failure
@ -109,6 +112,7 @@ volumes:
pgdata: pgdata:
static_volume: static_volume:
media_volume: media_volume:
exports_volume:
redis_broker_data: redis_broker_data:
redis_activity_data: redis_activity_data:
networks: networks:

0
exports/.gitkeep Normal file
View file

View file

@ -38,6 +38,7 @@ qrcode==7.3.1
redis==4.5.4 redis==4.5.4
requests==2.31.0 requests==2.31.0
responses==0.22.0 responses==0.22.0
s3-tar==0.1.13
setuptools>=65.5.1 # Not a direct dependency, pinned to get a security fix setuptools>=65.5.1 # Not a direct dependency, pinned to get a security fix
tornado==6.3.3 # Not a direct dependency, pinned to get a security fix tornado==6.3.3 # Not a direct dependency, pinned to get a security fix