mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-23 08:36:32 +00:00
Merge pull request #3228 from hughrun/user-export
Fix user exports to deal with s3 storage
This commit is contained in:
commit
21a39f8170
25 changed files with 730 additions and 303 deletions
|
@ -71,6 +71,9 @@ ENABLE_THUMBNAIL_GENERATION=true
|
|||
USE_S3=false
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
# seconds for signed S3 urls to expire
|
||||
# this is currently only used for user export files
|
||||
S3_SIGNED_URL_EXPIRY=900
|
||||
|
||||
# Commented are example values if you use a non-AWS, S3-compatible service
|
||||
# AWS S3 should work with only AWS_STORAGE_BUCKET_NAME and AWS_S3_REGION_NAME
|
||||
|
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -16,6 +16,7 @@
|
|||
# BookWyrm
|
||||
.env
|
||||
/images/
|
||||
/exports/
|
||||
/static/
|
||||
bookwyrm/static/css/bookwyrm.css
|
||||
bookwyrm/static/css/themes/
|
||||
|
|
92
bookwyrm/migrations/0193_auto_20240128_0249.py
Normal file
92
bookwyrm/migrations/0193_auto_20240128_0249.py
Normal file
|
@ -0,0 +1,92 @@
|
|||
# Generated by Django 3.2.23 on 2024-01-28 02:49
|
||||
|
||||
import bookwyrm.storage_backends
|
||||
import django.core.serializers.json
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0192_sitesettings_user_exports_enabled"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="bookwyrmexportjob",
|
||||
name="export_json",
|
||||
field=models.JSONField(
|
||||
encoder=django.core.serializers.json.DjangoJSONEncoder, null=True
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="bookwyrmexportjob",
|
||||
name="json_completed",
|
||||
field=models.BooleanField(default=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="bookwyrmexportjob",
|
||||
name="export_data",
|
||||
field=models.FileField(
|
||||
null=True,
|
||||
storage=bookwyrm.storage_backends.ExportsFileStorage,
|
||||
upload_to="",
|
||||
),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="AddFileToTar",
|
||||
fields=[
|
||||
(
|
||||
"childjob_ptr",
|
||||
models.OneToOneField(
|
||||
auto_created=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
parent_link=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
to="bookwyrm.childjob",
|
||||
),
|
||||
),
|
||||
(
|
||||
"parent_export_job",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="child_edition_export_jobs",
|
||||
to="bookwyrm.bookwyrmexportjob",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"abstract": False,
|
||||
},
|
||||
bases=("bookwyrm.childjob",),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="AddBookToUserExportJob",
|
||||
fields=[
|
||||
(
|
||||
"childjob_ptr",
|
||||
models.OneToOneField(
|
||||
auto_created=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
parent_link=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
to="bookwyrm.childjob",
|
||||
),
|
||||
),
|
||||
(
|
||||
"edition",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to="bookwyrm.edition",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"abstract": False,
|
||||
},
|
||||
bases=("bookwyrm.childjob",),
|
||||
),
|
||||
]
|
13
bookwyrm/migrations/0196_merge_20240318_1737.py
Normal file
13
bookwyrm/migrations/0196_merge_20240318_1737.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Generated by Django 3.2.23 on 2024-03-18 17:37
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0193_auto_20240128_0249"),
|
||||
("bookwyrm", "0195_alter_user_preferred_language"),
|
||||
]
|
||||
|
||||
operations = []
|
13
bookwyrm/migrations/0197_merge_20240324_0235.py
Normal file
13
bookwyrm/migrations/0197_merge_20240324_0235.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Generated by Django 3.2.25 on 2024-03-24 02:35
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0196_merge_20240318_1737"),
|
||||
("bookwyrm", "0196_merge_pr3134_into_main"),
|
||||
]
|
||||
|
||||
operations = []
|
|
@ -0,0 +1,23 @@
|
|||
# Generated by Django 3.2.25 on 2024-03-26 11:37
|
||||
|
||||
import bookwyrm.models.bookwyrm_export_job
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0197_merge_20240324_0235"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="bookwyrmexportjob",
|
||||
name="export_data",
|
||||
field=models.FileField(
|
||||
null=True,
|
||||
storage=bookwyrm.models.bookwyrm_export_job.select_exports_storage,
|
||||
upload_to="",
|
||||
),
|
||||
),
|
||||
]
|
13
bookwyrm/migrations/0199_merge_20240326_1217.py
Normal file
13
bookwyrm/migrations/0199_merge_20240326_1217.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Generated by Django 3.2.25 on 2024-03-26 12:17
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0198_alter_bookwyrmexportjob_export_data"),
|
||||
("bookwyrm", "0198_book_search_vector_author_aliases"),
|
||||
]
|
||||
|
||||
operations = []
|
27
bookwyrm/migrations/0200_auto_20240327_1914.py
Normal file
27
bookwyrm/migrations/0200_auto_20240327_1914.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
# Generated by Django 3.2.25 on 2024-03-27 19:14
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0199_merge_20240326_1217"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name="addfiletotar",
|
||||
name="childjob_ptr",
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name="addfiletotar",
|
||||
name="parent_export_job",
|
||||
),
|
||||
migrations.DeleteModel(
|
||||
name="AddBookToUserExportJob",
|
||||
),
|
||||
migrations.DeleteModel(
|
||||
name="AddFileToTar",
|
||||
),
|
||||
]
|
13
bookwyrm/migrations/0205_merge_20240413_0232.py
Normal file
13
bookwyrm/migrations/0205_merge_20240413_0232.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Generated by Django 3.2.25 on 2024-04-13 02:32
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0200_auto_20240327_1914"),
|
||||
("bookwyrm", "0204_merge_20240409_1042"),
|
||||
]
|
||||
|
||||
operations = []
|
|
@ -1,213 +1,318 @@
|
|||
"""Export user account to tar.gz file for import into another Bookwyrm instance"""
|
||||
|
||||
import dataclasses
|
||||
import logging
|
||||
from uuid import uuid4
|
||||
import os
|
||||
|
||||
from django.db.models import FileField
|
||||
from boto3.session import Session as BotoSession
|
||||
from s3_tar import S3Tar
|
||||
|
||||
from django.db.models import BooleanField, FileField, JSONField
|
||||
from django.db.models import Q
|
||||
from django.core.serializers.json import DjangoJSONEncoder
|
||||
from django.core.files.base import ContentFile
|
||||
from django.utils.module_loading import import_string
|
||||
|
||||
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, List, ListItem
|
||||
from bookwyrm import settings, storage_backends
|
||||
|
||||
from bookwyrm.models import AnnualGoal, ReadThrough, ShelfBook, ListItem
|
||||
from bookwyrm.models import Review, Comment, Quotation
|
||||
from bookwyrm.models import Edition
|
||||
from bookwyrm.models import UserFollows, User, UserBlocks
|
||||
from bookwyrm.models.job import ParentJob, ParentTask
|
||||
from bookwyrm.models.job import ParentJob
|
||||
from bookwyrm.tasks import app, IMPORTS
|
||||
from bookwyrm.utils.tar import BookwyrmTarFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BookwyrmAwsSession(BotoSession):
|
||||
"""a boto session that always uses settings.AWS_S3_ENDPOINT_URL"""
|
||||
|
||||
def client(self, *args, **kwargs): # pylint: disable=arguments-differ
|
||||
kwargs["endpoint_url"] = settings.AWS_S3_ENDPOINT_URL
|
||||
return super().client("s3", *args, **kwargs)
|
||||
|
||||
|
||||
def select_exports_storage():
|
||||
"""callable to allow for dependency on runtime configuration"""
|
||||
cls = import_string(settings.EXPORTS_STORAGE)
|
||||
return cls()
|
||||
|
||||
|
||||
class BookwyrmExportJob(ParentJob):
|
||||
"""entry for a specific request to export a bookwyrm user"""
|
||||
|
||||
export_data = FileField(null=True)
|
||||
export_data = FileField(null=True, storage=select_exports_storage)
|
||||
export_json = JSONField(null=True, encoder=DjangoJSONEncoder)
|
||||
json_completed = BooleanField(default=False)
|
||||
|
||||
def start_job(self):
|
||||
"""Start the job"""
|
||||
start_export_task.delay(job_id=self.id, no_children=True)
|
||||
"""schedule the first task"""
|
||||
|
||||
return self
|
||||
task = create_export_json_task.delay(job_id=self.id)
|
||||
self.task_id = task.id
|
||||
self.save(update_fields=["task_id"])
|
||||
|
||||
|
||||
@app.task(queue=IMPORTS, base=ParentTask)
|
||||
def start_export_task(**kwargs):
|
||||
"""trigger the child tasks for each row"""
|
||||
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])
|
||||
@app.task(queue=IMPORTS)
|
||||
def create_export_json_task(job_id):
|
||||
"""create the JSON data for the export"""
|
||||
|
||||
job = BookwyrmExportJob.objects.get(id=job_id)
|
||||
|
||||
# don't start the job if it was stopped from the UI
|
||||
if job.complete:
|
||||
return
|
||||
|
||||
try:
|
||||
# This is where ChildJobs get made
|
||||
job.export_data = ContentFile(b"", str(uuid4()))
|
||||
json_data = json_export(job.user)
|
||||
tar_export(json_data, job.user, job.export_data)
|
||||
job.save(update_fields=["export_data"])
|
||||
job.set_status("active")
|
||||
|
||||
# generate JSON structure
|
||||
job.export_json = export_json(job.user)
|
||||
job.save(update_fields=["export_json"])
|
||||
|
||||
# create archive in separate task
|
||||
create_archive_task.delay(job_id=job.id)
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
logger.exception("User Export Job %s Failed with error: %s", job.id, err)
|
||||
logger.exception(
|
||||
"create_export_json_task for %s failed with error: %s", job, err
|
||||
)
|
||||
job.set_status("failed")
|
||||
|
||||
job.set_status("complete")
|
||||
|
||||
def archive_file_location(file, directory="") -> str:
|
||||
"""get the relative location of a file inside the archive"""
|
||||
return os.path.join(directory, file.name)
|
||||
|
||||
|
||||
def tar_export(json_data: str, user, file):
|
||||
"""wrap the export information in a tar file"""
|
||||
file.open("wb")
|
||||
with BookwyrmTarFile.open(mode="w:gz", fileobj=file) as tar:
|
||||
tar.write_bytes(json_data.encode("utf-8"))
|
||||
def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""):
|
||||
"""
|
||||
add file to S3Tar inside directory, keeping any directories under its
|
||||
storage location
|
||||
"""
|
||||
s3_tar.add_file(
|
||||
os.path.join(storage.location, file.name),
|
||||
folder=os.path.dirname(archive_file_location(file, directory=directory)),
|
||||
)
|
||||
|
||||
# Add avatar image if present
|
||||
if getattr(user, "avatar", False):
|
||||
tar.add_image(user.avatar, filename="avatar")
|
||||
|
||||
@app.task(queue=IMPORTS)
|
||||
def create_archive_task(job_id):
|
||||
"""create the archive containing the JSON file and additional files"""
|
||||
|
||||
job = BookwyrmExportJob.objects.get(id=job_id)
|
||||
|
||||
# don't start the job if it was stopped from the UI
|
||||
if job.complete:
|
||||
return
|
||||
|
||||
try:
|
||||
export_task_id = str(job.task_id)
|
||||
archive_filename = f"{export_task_id}.tar.gz"
|
||||
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")
|
||||
|
||||
user = job.user
|
||||
editions = get_books_for_user(user)
|
||||
for book in editions:
|
||||
if getattr(book, "cover", False):
|
||||
tar.add_image(book.cover)
|
||||
|
||||
file.close()
|
||||
if settings.USE_S3:
|
||||
# Storage for writing temporary files
|
||||
exports_storage = storage_backends.ExportsS3Storage()
|
||||
|
||||
# Handle for creating the final archive
|
||||
s3_tar = S3Tar(
|
||||
exports_storage.bucket_name,
|
||||
os.path.join(exports_storage.location, archive_filename),
|
||||
session=BookwyrmAwsSession(),
|
||||
)
|
||||
|
||||
# Save JSON file to a temporary location
|
||||
export_json_tmp_file = os.path.join(export_task_id, "archive.json")
|
||||
exports_storage.save(
|
||||
export_json_tmp_file,
|
||||
ContentFile(export_json_bytes),
|
||||
)
|
||||
s3_tar.add_file(
|
||||
os.path.join(exports_storage.location, export_json_tmp_file)
|
||||
)
|
||||
|
||||
# Add images to TAR
|
||||
images_storage = storage_backends.ImagesStorage()
|
||||
|
||||
if user.avatar:
|
||||
add_file_to_s3_tar(s3_tar, images_storage, user.avatar)
|
||||
|
||||
for edition in editions:
|
||||
if edition.cover:
|
||||
add_file_to_s3_tar(
|
||||
s3_tar, images_storage, edition.cover, directory="images"
|
||||
)
|
||||
|
||||
# Create archive and store file name
|
||||
s3_tar.tar()
|
||||
job.export_data = archive_filename
|
||||
job.save(update_fields=["export_data"])
|
||||
|
||||
# Delete temporary files
|
||||
exports_storage.delete(export_json_tmp_file)
|
||||
|
||||
else:
|
||||
job.export_data = archive_filename
|
||||
with job.export_data.open("wb") as tar_file:
|
||||
with BookwyrmTarFile.open(mode="w:gz", fileobj=tar_file) as tar:
|
||||
# save json file
|
||||
tar.write_bytes(export_json_bytes)
|
||||
|
||||
# Add avatar image if present
|
||||
if user.avatar:
|
||||
tar.add_image(user.avatar)
|
||||
|
||||
for edition in editions:
|
||||
if edition.cover:
|
||||
tar.add_image(edition.cover, directory="images")
|
||||
job.save(update_fields=["export_data"])
|
||||
|
||||
job.set_status("completed")
|
||||
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
logger.exception("create_archive_task for %s failed with error: %s", job, err)
|
||||
job.set_status("failed")
|
||||
|
||||
|
||||
def json_export(
|
||||
user,
|
||||
): # pylint: disable=too-many-locals, too-many-statements, too-many-branches
|
||||
"""Generate an export for a user"""
|
||||
def export_json(user: User):
|
||||
"""create export JSON"""
|
||||
data = export_user(user) # in the root of the JSON structure
|
||||
data["settings"] = export_settings(user)
|
||||
data["goals"] = export_goals(user)
|
||||
data["books"] = export_books(user)
|
||||
data["saved_lists"] = export_saved_lists(user)
|
||||
data["follows"] = export_follows(user)
|
||||
data["blocks"] = export_blocks(user)
|
||||
return data
|
||||
|
||||
# User as AP object
|
||||
exported_user = user.to_activity()
|
||||
# I don't love this but it prevents a JSON encoding error
|
||||
# when there is no user image
|
||||
if exported_user.get("icon") in (None, dataclasses.MISSING):
|
||||
exported_user["icon"] = {}
|
||||
|
||||
def export_user(user: User):
|
||||
"""export user data"""
|
||||
data = user.to_activity()
|
||||
if user.avatar:
|
||||
data["icon"]["url"] = archive_file_location(user.avatar)
|
||||
else:
|
||||
# change the URL to be relative to the JSON file
|
||||
file_type = exported_user["icon"]["url"].rsplit(".", maxsplit=1)[-1]
|
||||
filename = f"avatar.{file_type}"
|
||||
exported_user["icon"]["url"] = filename
|
||||
data["icon"] = {}
|
||||
return data
|
||||
|
||||
# Additional settings - can't be serialized as AP
|
||||
|
||||
def export_settings(user: User):
|
||||
"""Additional settings - can't be serialized as AP"""
|
||||
vals = [
|
||||
"show_goal",
|
||||
"preferred_timezone",
|
||||
"default_post_privacy",
|
||||
"show_suggested_users",
|
||||
]
|
||||
exported_user["settings"] = {}
|
||||
for k in vals:
|
||||
exported_user["settings"][k] = getattr(user, k)
|
||||
return {k: getattr(user, k) for k in vals}
|
||||
|
||||
# Reading goals - can't be serialized as AP
|
||||
reading_goals = AnnualGoal.objects.filter(user=user).distinct()
|
||||
exported_user["goals"] = []
|
||||
for goal in reading_goals:
|
||||
exported_user["goals"].append(
|
||||
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy}
|
||||
)
|
||||
|
||||
# Reading history - can't be serialized as AP
|
||||
readthroughs = ReadThrough.objects.filter(user=user).distinct().values()
|
||||
readthroughs = list(readthroughs)
|
||||
def export_saved_lists(user: User):
|
||||
"""add user saved lists to export JSON"""
|
||||
return [l.remote_id for l in user.saved_lists.all()]
|
||||
|
||||
# Books
|
||||
editions = get_books_for_user(user)
|
||||
exported_user["books"] = []
|
||||
|
||||
for edition in editions:
|
||||
book = {}
|
||||
book["work"] = edition.parent_work.to_activity()
|
||||
book["edition"] = edition.to_activity()
|
||||
|
||||
if book["edition"].get("cover"):
|
||||
# change the URL to be relative to the JSON file
|
||||
filename = book["edition"]["cover"]["url"].rsplit("/", maxsplit=1)[-1]
|
||||
book["edition"]["cover"]["url"] = f"covers/{filename}"
|
||||
|
||||
# authors
|
||||
book["authors"] = []
|
||||
for author in edition.authors.all():
|
||||
book["authors"].append(author.to_activity())
|
||||
|
||||
# Shelves this book is on
|
||||
# Every ShelfItem is this book so we don't other serializing
|
||||
book["shelves"] = []
|
||||
shelf_books = (
|
||||
ShelfBook.objects.select_related("shelf")
|
||||
.filter(user=user, book=edition)
|
||||
.distinct()
|
||||
)
|
||||
|
||||
for shelfbook in shelf_books:
|
||||
book["shelves"].append(shelfbook.shelf.to_activity())
|
||||
|
||||
# Lists and ListItems
|
||||
# ListItems include "notes" and "approved" so we need them
|
||||
# even though we know it's this book
|
||||
book["lists"] = []
|
||||
list_items = ListItem.objects.filter(book=edition, user=user).distinct()
|
||||
|
||||
for item in list_items:
|
||||
list_info = item.book_list.to_activity()
|
||||
list_info[
|
||||
"privacy"
|
||||
] = item.book_list.privacy # this isn't serialized so we add it
|
||||
list_info["list_item"] = item.to_activity()
|
||||
book["lists"].append(list_info)
|
||||
|
||||
# Statuses
|
||||
# Can't use select_subclasses here because
|
||||
# we need to filter on the "book" value,
|
||||
# which is not available on an ordinary Status
|
||||
for status in ["comments", "quotations", "reviews"]:
|
||||
book[status] = []
|
||||
|
||||
comments = Comment.objects.filter(user=user, book=edition).all()
|
||||
for status in comments:
|
||||
obj = status.to_activity()
|
||||
obj["progress"] = status.progress
|
||||
obj["progress_mode"] = status.progress_mode
|
||||
book["comments"].append(obj)
|
||||
|
||||
quotes = Quotation.objects.filter(user=user, book=edition).all()
|
||||
for status in quotes:
|
||||
obj = status.to_activity()
|
||||
obj["position"] = status.position
|
||||
obj["endposition"] = status.endposition
|
||||
obj["position_mode"] = status.position_mode
|
||||
book["quotations"].append(obj)
|
||||
|
||||
reviews = Review.objects.filter(user=user, book=edition).all()
|
||||
for status in reviews:
|
||||
obj = status.to_activity()
|
||||
book["reviews"].append(obj)
|
||||
|
||||
# readthroughs can't be serialized to activity
|
||||
book_readthroughs = (
|
||||
ReadThrough.objects.filter(user=user, book=edition).distinct().values()
|
||||
)
|
||||
book["readthroughs"] = list(book_readthroughs)
|
||||
|
||||
# append everything
|
||||
exported_user["books"].append(book)
|
||||
|
||||
# saved book lists - just the remote id
|
||||
saved_lists = List.objects.filter(id__in=user.saved_lists.all()).distinct()
|
||||
exported_user["saved_lists"] = [l.remote_id for l in saved_lists]
|
||||
|
||||
# follows - just the remote id
|
||||
def export_follows(user: User):
|
||||
"""add user follows to export JSON"""
|
||||
follows = UserFollows.objects.filter(user_subject=user).distinct()
|
||||
following = User.objects.filter(userfollows_user_object__in=follows).distinct()
|
||||
exported_user["follows"] = [f.remote_id for f in following]
|
||||
return [f.remote_id for f in following]
|
||||
|
||||
# blocks - just the remote id
|
||||
|
||||
def export_blocks(user: User):
|
||||
"""add user blocks to export JSON"""
|
||||
blocks = UserBlocks.objects.filter(user_subject=user).distinct()
|
||||
blocking = User.objects.filter(userblocks_user_object__in=blocks).distinct()
|
||||
return [b.remote_id for b in blocking]
|
||||
|
||||
exported_user["blocks"] = [b.remote_id for b in blocking]
|
||||
|
||||
return DjangoJSONEncoder().encode(exported_user)
|
||||
def export_goals(user: User):
|
||||
"""add user reading goals to export JSON"""
|
||||
reading_goals = AnnualGoal.objects.filter(user=user).distinct()
|
||||
return [
|
||||
{"goal": goal.goal, "year": goal.year, "privacy": goal.privacy}
|
||||
for goal in reading_goals
|
||||
]
|
||||
|
||||
|
||||
def export_books(user: User):
|
||||
"""add books to export JSON"""
|
||||
editions = get_books_for_user(user)
|
||||
return [export_book(user, edition) for edition in editions]
|
||||
|
||||
|
||||
def export_book(user: User, edition: Edition):
|
||||
"""add book to export JSON"""
|
||||
data = {}
|
||||
data["work"] = edition.parent_work.to_activity()
|
||||
data["edition"] = edition.to_activity()
|
||||
|
||||
if edition.cover:
|
||||
data["edition"]["cover"]["url"] = archive_file_location(
|
||||
edition.cover, directory="images"
|
||||
)
|
||||
|
||||
# authors
|
||||
data["authors"] = [author.to_activity() for author in edition.authors.all()]
|
||||
|
||||
# Shelves this book is on
|
||||
# Every ShelfItem is this book so we don't other serializing
|
||||
shelf_books = (
|
||||
ShelfBook.objects.select_related("shelf")
|
||||
.filter(user=user, book=edition)
|
||||
.distinct()
|
||||
)
|
||||
data["shelves"] = [shelfbook.shelf.to_activity() for shelfbook in shelf_books]
|
||||
|
||||
# Lists and ListItems
|
||||
# ListItems include "notes" and "approved" so we need them
|
||||
# even though we know it's this book
|
||||
list_items = ListItem.objects.filter(book=edition, user=user).distinct()
|
||||
|
||||
data["lists"] = []
|
||||
for item in list_items:
|
||||
list_info = item.book_list.to_activity()
|
||||
list_info[
|
||||
"privacy"
|
||||
] = item.book_list.privacy # this isn't serialized so we add it
|
||||
list_info["list_item"] = item.to_activity()
|
||||
data["lists"].append(list_info)
|
||||
|
||||
# Statuses
|
||||
# Can't use select_subclasses here because
|
||||
# we need to filter on the "book" value,
|
||||
# which is not available on an ordinary Status
|
||||
for status in ["comments", "quotations", "reviews"]:
|
||||
data[status] = []
|
||||
|
||||
comments = Comment.objects.filter(user=user, book=edition).all()
|
||||
for status in comments:
|
||||
obj = status.to_activity()
|
||||
obj["progress"] = status.progress
|
||||
obj["progress_mode"] = status.progress_mode
|
||||
data["comments"].append(obj)
|
||||
|
||||
quotes = Quotation.objects.filter(user=user, book=edition).all()
|
||||
for status in quotes:
|
||||
obj = status.to_activity()
|
||||
obj["position"] = status.position
|
||||
obj["endposition"] = status.endposition
|
||||
obj["position_mode"] = status.position_mode
|
||||
data["quotations"].append(obj)
|
||||
|
||||
reviews = Review.objects.filter(user=user, book=edition).all()
|
||||
data["reviews"] = [status.to_activity() for status in reviews]
|
||||
|
||||
# readthroughs can't be serialized to activity
|
||||
book_readthroughs = (
|
||||
ReadThrough.objects.filter(user=user, book=edition).distinct().values()
|
||||
)
|
||||
data["readthroughs"] = list(book_readthroughs)
|
||||
return data
|
||||
|
||||
|
||||
def get_books_for_user(user):
|
||||
|
|
|
@ -42,20 +42,23 @@ def start_import_task(**kwargs):
|
|||
try:
|
||||
archive_file.open("rb")
|
||||
with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar:
|
||||
job.import_data = json.loads(tar.read("archive.json").decode("utf-8"))
|
||||
json_filename = next(
|
||||
filter(lambda n: n.startswith("archive"), tar.getnames())
|
||||
)
|
||||
job.import_data = json.loads(tar.read(json_filename).decode("utf-8"))
|
||||
|
||||
if "include_user_profile" in job.required:
|
||||
update_user_profile(job.user, tar, job.import_data)
|
||||
if "include_user_settings" in job.required:
|
||||
update_user_settings(job.user, job.import_data)
|
||||
if "include_goals" in job.required:
|
||||
update_goals(job.user, job.import_data.get("goals"))
|
||||
update_goals(job.user, job.import_data.get("goals", []))
|
||||
if "include_saved_lists" in job.required:
|
||||
upsert_saved_lists(job.user, job.import_data.get("saved_lists"))
|
||||
upsert_saved_lists(job.user, job.import_data.get("saved_lists", []))
|
||||
if "include_follows" in job.required:
|
||||
upsert_follows(job.user, job.import_data.get("follows"))
|
||||
upsert_follows(job.user, job.import_data.get("follows", []))
|
||||
if "include_blocks" in job.required:
|
||||
upsert_user_blocks(job.user, job.import_data.get("blocks"))
|
||||
upsert_user_blocks(job.user, job.import_data.get("blocks", []))
|
||||
|
||||
process_books(job, tar)
|
||||
|
||||
|
@ -212,7 +215,7 @@ def upsert_statuses(user, cls, data, book_remote_id):
|
|||
instance.save() # save and broadcast
|
||||
|
||||
else:
|
||||
logger.info("User does not have permission to import statuses")
|
||||
logger.warning("User does not have permission to import statuses")
|
||||
|
||||
|
||||
def upsert_lists(user, lists, book_id):
|
||||
|
|
|
@ -135,8 +135,7 @@ class ParentJob(Job):
|
|||
)
|
||||
app.control.revoke(list(tasks))
|
||||
|
||||
for task in self.pending_child_jobs:
|
||||
task.update(status=self.Status.STOPPED)
|
||||
self.pending_child_jobs.update(status=self.Status.STOPPED)
|
||||
|
||||
@property
|
||||
def has_completed(self):
|
||||
|
@ -248,7 +247,7 @@ class SubTask(app.Task):
|
|||
"""
|
||||
|
||||
def before_start(
|
||||
self, task_id, args, kwargs
|
||||
self, task_id, *args, **kwargs
|
||||
): # pylint: disable=no-self-use, unused-argument
|
||||
"""Handler called before the task starts. Override.
|
||||
|
||||
|
@ -272,7 +271,7 @@ class SubTask(app.Task):
|
|||
child_job.set_status(ChildJob.Status.ACTIVE)
|
||||
|
||||
def on_success(
|
||||
self, retval, task_id, args, kwargs
|
||||
self, retval, task_id, *args, **kwargs
|
||||
): # pylint: disable=no-self-use, unused-argument
|
||||
"""Run by the worker if the task executes successfully. Override.
|
||||
|
||||
|
|
|
@ -374,6 +374,7 @@ if USE_HTTPS:
|
|||
|
||||
USE_S3 = env.bool("USE_S3", False)
|
||||
USE_AZURE = env.bool("USE_AZURE", False)
|
||||
S3_SIGNED_URL_EXPIRY = env.int("S3_SIGNED_URL_EXPIRY", 900)
|
||||
|
||||
if USE_S3:
|
||||
# AWS settings
|
||||
|
@ -388,16 +389,20 @@ if USE_S3:
|
|||
# S3 Static settings
|
||||
STATIC_LOCATION = "static"
|
||||
STATIC_URL = f"{PROTOCOL}://{AWS_S3_CUSTOM_DOMAIN}/{STATIC_LOCATION}/"
|
||||
STATIC_FULL_URL = STATIC_URL
|
||||
STATICFILES_STORAGE = "bookwyrm.storage_backends.StaticStorage"
|
||||
# S3 Media settings
|
||||
MEDIA_LOCATION = "images"
|
||||
MEDIA_URL = f"{PROTOCOL}://{AWS_S3_CUSTOM_DOMAIN}/{MEDIA_LOCATION}/"
|
||||
MEDIA_FULL_URL = MEDIA_URL
|
||||
STATIC_FULL_URL = STATIC_URL
|
||||
DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.ImagesStorage"
|
||||
# S3 Exports settings
|
||||
EXPORTS_STORAGE = "bookwyrm.storage_backends.ExportsS3Storage"
|
||||
# Content Security Policy
|
||||
CSP_DEFAULT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
|
||||
CSP_SCRIPT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
|
||||
elif USE_AZURE:
|
||||
# Azure settings
|
||||
AZURE_ACCOUNT_NAME = env("AZURE_ACCOUNT_NAME")
|
||||
AZURE_ACCOUNT_KEY = env("AZURE_ACCOUNT_KEY")
|
||||
AZURE_CONTAINER = env("AZURE_CONTAINER")
|
||||
|
@ -407,6 +412,7 @@ elif USE_AZURE:
|
|||
STATIC_URL = (
|
||||
f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{STATIC_LOCATION}/"
|
||||
)
|
||||
STATIC_FULL_URL = STATIC_URL
|
||||
STATICFILES_STORAGE = "bookwyrm.storage_backends.AzureStaticStorage"
|
||||
# Azure Media settings
|
||||
MEDIA_LOCATION = "images"
|
||||
|
@ -414,15 +420,24 @@ elif USE_AZURE:
|
|||
f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{MEDIA_LOCATION}/"
|
||||
)
|
||||
MEDIA_FULL_URL = MEDIA_URL
|
||||
STATIC_FULL_URL = STATIC_URL
|
||||
DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.AzureImagesStorage"
|
||||
# Azure Exports settings
|
||||
EXPORTS_STORAGE = None # not implemented yet
|
||||
# Content Security Policy
|
||||
CSP_DEFAULT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
|
||||
CSP_SCRIPT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS
|
||||
else:
|
||||
# Static settings
|
||||
STATIC_URL = "/static/"
|
||||
STATIC_FULL_URL = f"{PROTOCOL}://{DOMAIN}{STATIC_URL}"
|
||||
STATICFILES_STORAGE = "django.contrib.staticfiles.storage.StaticFilesStorage"
|
||||
# Media settings
|
||||
MEDIA_URL = "/images/"
|
||||
MEDIA_FULL_URL = f"{PROTOCOL}://{DOMAIN}{MEDIA_URL}"
|
||||
STATIC_FULL_URL = f"{PROTOCOL}://{DOMAIN}{STATIC_URL}"
|
||||
DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage"
|
||||
# Exports settings
|
||||
EXPORTS_STORAGE = "bookwyrm.storage_backends.ExportsFileStorage"
|
||||
# Content Security Policy
|
||||
CSP_DEFAULT_SRC = ["'self'"] + CSP_ADDITIONAL_HOSTS
|
||||
CSP_SCRIPT_SRC = ["'self'"] + CSP_ADDITIONAL_HOSTS
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
"""Handles backends for storages"""
|
||||
import os
|
||||
from tempfile import SpooledTemporaryFile
|
||||
from django.core.files.storage import FileSystemStorage
|
||||
from storages.backends.s3boto3 import S3Boto3Storage
|
||||
from storages.backends.azure_storage import AzureStorage
|
||||
|
||||
|
@ -61,3 +62,18 @@ class AzureImagesStorage(AzureStorage): # pylint: disable=abstract-method
|
|||
|
||||
location = "images"
|
||||
overwrite_files = False
|
||||
|
||||
|
||||
class ExportsFileStorage(FileSystemStorage): # pylint: disable=abstract-method
|
||||
"""Storage class for exports contents with local files"""
|
||||
|
||||
location = "exports"
|
||||
overwrite_files = False
|
||||
|
||||
|
||||
class ExportsS3Storage(S3Boto3Storage): # pylint: disable=abstract-method
|
||||
"""Storage class for exports contents with S3"""
|
||||
|
||||
location = "exports"
|
||||
default_acl = None
|
||||
overwrite_files = False
|
||||
|
|
|
@ -97,25 +97,25 @@
|
|||
</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% for job in jobs %}
|
||||
{% for export in jobs %}
|
||||
<tr>
|
||||
<td>{{ job.updated_date }}</td>
|
||||
<td>{{ export.job.updated_date }}</td>
|
||||
<td>
|
||||
<span
|
||||
{% if job.status == "stopped" or job.status == "failed" %}
|
||||
{% if export.job.status == "stopped" or export.job.status == "failed" %}
|
||||
class="tag is-danger"
|
||||
{% elif job.status == "pending" %}
|
||||
{% elif export.job.status == "pending" %}
|
||||
class="tag is-warning"
|
||||
{% elif job.complete %}
|
||||
{% elif export.job.complete %}
|
||||
class="tag"
|
||||
{% else %}
|
||||
class="tag is-success"
|
||||
{% endif %}
|
||||
>
|
||||
{% if job.status %}
|
||||
{{ job.status }}
|
||||
{{ job.status_display }}
|
||||
{% elif job.complete %}
|
||||
{% if export.job.status %}
|
||||
{{ export.job.status }}
|
||||
{{ export.job.status_display }}
|
||||
{% elif export.job.complete %}
|
||||
{% trans "Complete" %}
|
||||
{% else %}
|
||||
{% trans "Active" %}
|
||||
|
@ -123,18 +123,20 @@
|
|||
</span>
|
||||
</td>
|
||||
<td>
|
||||
<span>{{ job.export_data|get_file_size }}</span>
|
||||
{% if export.size %}
|
||||
<span>{{ export.size|get_file_size }}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.complete and not job.status == "stopped" and not job.status == "failed" %}
|
||||
<p>
|
||||
<a download="" href="/preferences/user-export/{{ job.task_id }}">
|
||||
<span class="icon icon-download" aria-hidden="true"></span>
|
||||
<span class="is-hidden-mobile">
|
||||
{% trans "Download your export" %}
|
||||
</span>
|
||||
</a>
|
||||
</p>
|
||||
{% if export.url %}
|
||||
<a href="{{ export.url }}">
|
||||
<span class="icon icon-download" aria-hidden="true"></span>
|
||||
<span class="is-hidden-mobile">
|
||||
{% trans "Download your export" %}
|
||||
</span>
|
||||
</a>
|
||||
{% elif export.unavailable %}
|
||||
{% trans "Archive is no longer available" %}
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
|
|
|
@ -157,13 +157,13 @@
|
|||
>
|
||||
<div class="notification is-danger is-light">
|
||||
<p class="my-2">{% trans "Users are currently unable to start new user exports. This is the default setting." %}</p>
|
||||
{% if use_s3 %}
|
||||
<p>{% trans "It is not currently possible to provide user exports when using s3 storage. The BookWyrm development team are working on a fix for this." %}</p>
|
||||
{% if use_azure %}
|
||||
<p>{% trans "It is not currently possible to provide user exports when using Azure storage." %}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% csrf_token %}
|
||||
<div class="control">
|
||||
<button type="submit" class="button is-success" {% if use_s3 %}disabled{% endif %}>
|
||||
<button type="submit" class="button is-success" {% if use_azure %}disabled{% endif %}>
|
||||
{% trans "Enable user exports" %}
|
||||
</button>
|
||||
</div>
|
||||
|
|
|
@ -130,11 +130,14 @@ def id_to_username(user_id):
|
|||
|
||||
|
||||
@register.filter(name="get_file_size")
|
||||
def get_file_size(file):
|
||||
def get_file_size(nbytes):
|
||||
"""display the size of a file in human readable terms"""
|
||||
|
||||
try:
|
||||
raw_size = os.stat(file.path).st_size
|
||||
raw_size = float(nbytes)
|
||||
except (ValueError, TypeError):
|
||||
return repr(nbytes)
|
||||
else:
|
||||
if raw_size < 1024:
|
||||
return f"{raw_size} bytes"
|
||||
if raw_size < 1024**2:
|
||||
|
@ -142,8 +145,6 @@ def get_file_size(file):
|
|||
if raw_size < 1024**3:
|
||||
return f"{raw_size/1024**2:.2f} MB"
|
||||
return f"{raw_size/1024**3:.2f} GB"
|
||||
except Exception: # pylint: disable=broad-except
|
||||
return ""
|
||||
|
||||
|
||||
@register.filter(name="get_user_permission")
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
"""test bookwyrm user export functions"""
|
||||
import datetime
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from django.core.serializers.json import DjangoJSONEncoder
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
from django.test import TestCase
|
||||
|
||||
from bookwyrm import models
|
||||
import bookwyrm.models.bookwyrm_export_job as export_job
|
||||
from bookwyrm.utils.tar import BookwyrmTarFile
|
||||
|
||||
|
||||
class BookwyrmExport(TestCase):
|
||||
class BookwyrmExportJob(TestCase):
|
||||
"""testing user export functions"""
|
||||
|
||||
def setUp(self):
|
||||
|
@ -42,6 +43,11 @@ class BookwyrmExport(TestCase):
|
|||
preferred_timezone="America/Los Angeles",
|
||||
default_post_privacy="followers",
|
||||
)
|
||||
avatar_path = pathlib.Path(__file__).parent.joinpath(
|
||||
"../../static/images/default_avi.jpg"
|
||||
)
|
||||
with open(avatar_path, "rb") as avatar_file:
|
||||
self.local_user.avatar.save("mouse-avatar.jpg", avatar_file)
|
||||
|
||||
self.rat_user = models.User.objects.create_user(
|
||||
"rat", "rat@rat.rat", "ratword", local=True, localname="rat"
|
||||
|
@ -87,6 +93,13 @@ class BookwyrmExport(TestCase):
|
|||
title="Example Edition", parent_work=self.work
|
||||
)
|
||||
|
||||
# edition cover
|
||||
cover_path = pathlib.Path(__file__).parent.joinpath(
|
||||
"../../static/images/default_avi.jpg"
|
||||
)
|
||||
with open(cover_path, "rb") as cover_file:
|
||||
self.edition.cover.save("tèst.jpg", cover_file)
|
||||
|
||||
self.edition.authors.add(self.author)
|
||||
|
||||
# readthrough
|
||||
|
@ -139,91 +152,105 @@ class BookwyrmExport(TestCase):
|
|||
book=self.edition,
|
||||
)
|
||||
|
||||
def test_json_export_user_settings(self):
|
||||
"""Test the json export function for basic user info"""
|
||||
data = export_job.json_export(self.local_user)
|
||||
user_data = json.loads(data)
|
||||
self.assertEqual(user_data["preferredUsername"], "mouse")
|
||||
self.assertEqual(user_data["name"], "Mouse")
|
||||
self.assertEqual(user_data["summary"], "<p>I'm a real bookmouse</p>")
|
||||
self.assertEqual(user_data["manuallyApprovesFollowers"], False)
|
||||
self.assertEqual(user_data["hideFollows"], False)
|
||||
self.assertEqual(user_data["discoverable"], True)
|
||||
self.assertEqual(user_data["settings"]["show_goal"], False)
|
||||
self.assertEqual(user_data["settings"]["show_suggested_users"], False)
|
||||
self.job = models.BookwyrmExportJob.objects.create(user=self.local_user)
|
||||
|
||||
# run the first stage of the export
|
||||
with patch("bookwyrm.models.bookwyrm_export_job.create_archive_task.delay"):
|
||||
models.bookwyrm_export_job.create_export_json_task(job_id=self.job.id)
|
||||
self.job.refresh_from_db()
|
||||
|
||||
def test_add_book_to_user_export_job(self):
|
||||
"""does AddBookToUserExportJob ...add the book to the export?"""
|
||||
self.assertIsNotNone(self.job.export_json["books"])
|
||||
self.assertEqual(len(self.job.export_json["books"]), 1)
|
||||
book = self.job.export_json["books"][0]
|
||||
|
||||
self.assertEqual(book["work"]["id"], self.work.remote_id)
|
||||
self.assertEqual(len(book["authors"]), 1)
|
||||
self.assertEqual(len(book["shelves"]), 1)
|
||||
self.assertEqual(len(book["lists"]), 1)
|
||||
self.assertEqual(len(book["comments"]), 1)
|
||||
self.assertEqual(len(book["reviews"]), 1)
|
||||
self.assertEqual(len(book["quotations"]), 1)
|
||||
self.assertEqual(len(book["readthroughs"]), 1)
|
||||
|
||||
self.assertEqual(book["edition"]["id"], self.edition.remote_id)
|
||||
self.assertEqual(
|
||||
user_data["settings"]["preferred_timezone"], "America/Los Angeles"
|
||||
)
|
||||
self.assertEqual(user_data["settings"]["default_post_privacy"], "followers")
|
||||
|
||||
def test_json_export_extended_user_data(self):
|
||||
"""Test the json export function for other non-book user info"""
|
||||
data = export_job.json_export(self.local_user)
|
||||
json_data = json.loads(data)
|
||||
|
||||
# goal
|
||||
self.assertEqual(len(json_data["goals"]), 1)
|
||||
self.assertEqual(json_data["goals"][0]["goal"], 128937123)
|
||||
self.assertEqual(json_data["goals"][0]["year"], timezone.now().year)
|
||||
self.assertEqual(json_data["goals"][0]["privacy"], "followers")
|
||||
|
||||
# saved lists
|
||||
self.assertEqual(len(json_data["saved_lists"]), 1)
|
||||
self.assertEqual(json_data["saved_lists"][0], "https://local.lists/9999")
|
||||
|
||||
# follows
|
||||
self.assertEqual(len(json_data["follows"]), 1)
|
||||
self.assertEqual(json_data["follows"][0], "https://your.domain.here/user/rat")
|
||||
# blocked users
|
||||
self.assertEqual(len(json_data["blocks"]), 1)
|
||||
self.assertEqual(json_data["blocks"][0], "https://your.domain.here/user/badger")
|
||||
|
||||
def test_json_export_books(self):
|
||||
"""Test the json export function for extended user info"""
|
||||
|
||||
data = export_job.json_export(self.local_user)
|
||||
json_data = json.loads(data)
|
||||
start_date = json_data["books"][0]["readthroughs"][0]["start_date"]
|
||||
|
||||
self.assertEqual(len(json_data["books"]), 1)
|
||||
self.assertEqual(json_data["books"][0]["edition"]["title"], "Example Edition")
|
||||
self.assertEqual(len(json_data["books"][0]["authors"]), 1)
|
||||
self.assertEqual(json_data["books"][0]["authors"][0]["name"], "Sam Zhu")
|
||||
|
||||
self.assertEqual(
|
||||
f'"{start_date}"', DjangoJSONEncoder().encode(self.readthrough_start)
|
||||
book["edition"]["cover"]["url"], f"images/{self.edition.cover.name}"
|
||||
)
|
||||
|
||||
self.assertEqual(json_data["books"][0]["shelves"][0]["name"], "Read")
|
||||
def test_start_export_task(self):
|
||||
"""test saved list task saves initial json and data"""
|
||||
self.assertIsNotNone(self.job.export_data)
|
||||
self.assertIsNotNone(self.job.export_json)
|
||||
self.assertEqual(self.job.export_json["name"], self.local_user.name)
|
||||
|
||||
self.assertEqual(len(json_data["books"][0]["lists"]), 1)
|
||||
self.assertEqual(json_data["books"][0]["lists"][0]["name"], "My excellent list")
|
||||
def test_export_saved_lists_task(self):
|
||||
"""test export_saved_lists_task adds the saved lists"""
|
||||
self.assertIsNotNone(self.job.export_json["saved_lists"])
|
||||
self.assertEqual(
|
||||
json_data["books"][0]["lists"][0]["list_item"]["book"],
|
||||
self.edition.remote_id,
|
||||
self.edition.id,
|
||||
self.job.export_json["saved_lists"][0], self.saved_list.remote_id
|
||||
)
|
||||
|
||||
self.assertEqual(len(json_data["books"][0]["reviews"]), 1)
|
||||
self.assertEqual(len(json_data["books"][0]["comments"]), 1)
|
||||
self.assertEqual(len(json_data["books"][0]["quotations"]), 1)
|
||||
def test_export_follows_task(self):
|
||||
"""test export_follows_task adds the follows"""
|
||||
self.assertIsNotNone(self.job.export_json["follows"])
|
||||
self.assertEqual(self.job.export_json["follows"][0], self.rat_user.remote_id)
|
||||
|
||||
self.assertEqual(json_data["books"][0]["reviews"][0]["name"], "my review")
|
||||
self.assertEqual(
|
||||
json_data["books"][0]["reviews"][0]["content"], "<p>awesome</p>"
|
||||
)
|
||||
self.assertEqual(json_data["books"][0]["reviews"][0]["rating"], 5.0)
|
||||
def test_export_blocks_task(self):
|
||||
"""test export_blocks_task adds the blocks"""
|
||||
self.assertIsNotNone(self.job.export_json["blocks"])
|
||||
self.assertEqual(self.job.export_json["blocks"][0], self.badger_user.remote_id)
|
||||
|
||||
self.assertEqual(
|
||||
json_data["books"][0]["comments"][0]["content"], "<p>ok so far</p>"
|
||||
)
|
||||
self.assertEqual(json_data["books"][0]["comments"][0]["progress"], 15)
|
||||
self.assertEqual(json_data["books"][0]["comments"][0]["progress_mode"], "PG")
|
||||
def test_export_reading_goals_task(self):
|
||||
"""test export_reading_goals_task adds the goals"""
|
||||
self.assertIsNotNone(self.job.export_json["goals"])
|
||||
self.assertEqual(self.job.export_json["goals"][0]["goal"], 128937123)
|
||||
|
||||
def test_json_export(self):
|
||||
"""test json_export job adds settings"""
|
||||
self.assertIsNotNone(self.job.export_json["settings"])
|
||||
self.assertFalse(self.job.export_json["settings"]["show_goal"])
|
||||
self.assertEqual(
|
||||
json_data["books"][0]["quotations"][0]["content"], "<p>check this out</p>"
|
||||
self.job.export_json["settings"]["preferred_timezone"],
|
||||
"America/Los Angeles",
|
||||
)
|
||||
self.assertEqual(
|
||||
json_data["books"][0]["quotations"][0]["quote"],
|
||||
"<p>A rose by any other name</p>",
|
||||
self.job.export_json["settings"]["default_post_privacy"], "followers"
|
||||
)
|
||||
self.assertFalse(self.job.export_json["settings"]["show_suggested_users"])
|
||||
|
||||
def test_get_books_for_user(self):
|
||||
"""does get_books_for_user get all the books"""
|
||||
|
||||
data = models.bookwyrm_export_job.get_books_for_user(self.local_user)
|
||||
|
||||
self.assertEqual(len(data), 1)
|
||||
self.assertEqual(data[0].title, "Example Edition")
|
||||
|
||||
def test_archive(self):
|
||||
"""actually create the TAR file"""
|
||||
models.bookwyrm_export_job.create_archive_task(job_id=self.job.id)
|
||||
self.job.refresh_from_db()
|
||||
|
||||
with (
|
||||
self.job.export_data.open("rb") as tar_file,
|
||||
BookwyrmTarFile.open(mode="r", fileobj=tar_file) as tar,
|
||||
):
|
||||
archive_json_file = tar.extractfile("archive.json")
|
||||
data = json.load(archive_json_file)
|
||||
|
||||
# JSON from the archive should be what we want it to be
|
||||
self.assertEqual(data, self.job.export_json)
|
||||
|
||||
# User avatar should be present in archive
|
||||
with self.local_user.avatar.open() as expected_avatar:
|
||||
archive_avatar = tar.extractfile(data["icon"]["url"])
|
||||
self.assertEqual(expected_avatar.read(), archive_avatar.read())
|
||||
|
||||
# Edition cover should be present in archive
|
||||
with self.edition.cover.open() as expected_cover:
|
||||
archive_cover = tar.extractfile(
|
||||
data["books"][0]["edition"]["cover"]["url"]
|
||||
)
|
||||
self.assertEqual(expected_cover.read(), archive_cover.read())
|
||||
|
|
|
@ -42,7 +42,7 @@ class ExportUserViews(TestCase):
|
|||
|
||||
request = self.factory.post("")
|
||||
request.user = self.local_user
|
||||
with patch("bookwyrm.models.bookwyrm_export_job.start_export_task.delay"):
|
||||
with patch("bookwyrm.models.bookwyrm_export_job.BookwyrmExportJob.start_job"):
|
||||
export = views.ExportUser.as_view()(request)
|
||||
self.assertIsInstance(export, HttpResponse)
|
||||
self.assertEqual(export.status_code, 302)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""manage tar files for user exports"""
|
||||
import io
|
||||
import os
|
||||
import tarfile
|
||||
from typing import Any, Optional
|
||||
from uuid import uuid4
|
||||
|
@ -17,20 +18,20 @@ class BookwyrmTarFile(tarfile.TarFile):
|
|||
self.addfile(info, fileobj=buffer)
|
||||
|
||||
def add_image(
|
||||
self, image: Any, filename: Optional[str] = None, directory: Any = ""
|
||||
self, image: Any, filename: Optional[str] = None, directory: str = ""
|
||||
) -> None:
|
||||
"""
|
||||
Add an image to the tar archive
|
||||
:param str filename: overrides the file name set by image
|
||||
:param str directory: the directory in the archive to put the image
|
||||
"""
|
||||
if filename is not None:
|
||||
file_type = image.name.rsplit(".", maxsplit=1)[-1]
|
||||
filename = f"{directory}{filename}.{file_type}"
|
||||
if filename is None:
|
||||
dst_filename = image.name
|
||||
else:
|
||||
filename = f"{directory}{image.name}"
|
||||
dst_filename = filename + os.path.splitext(image.name)[1]
|
||||
dst_path = os.path.join(directory, dst_filename)
|
||||
|
||||
info = tarfile.TarInfo(name=filename)
|
||||
info = tarfile.TarInfo(name=dst_path)
|
||||
info.size = image.size
|
||||
|
||||
self.addfile(info, fileobj=image)
|
||||
|
@ -43,7 +44,7 @@ class BookwyrmTarFile(tarfile.TarFile):
|
|||
|
||||
def write_image_to_file(self, filename: str, file_field: Any) -> None:
|
||||
"""add an image to the tar"""
|
||||
extension = filename.rsplit(".")[-1]
|
||||
extension = os.path.splitext(filename)[1]
|
||||
if buf := self.extractfile(filename):
|
||||
filename = f"{str(uuid4())}.{extension}"
|
||||
filename = str(uuid4()) + extension
|
||||
file_field.save(filename, File(buf))
|
||||
|
|
|
@ -9,7 +9,7 @@ from django.views.decorators.http import require_POST
|
|||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.views.helpers import redirect_to_referer
|
||||
from bookwyrm.settings import PAGE_LENGTH, USE_S3
|
||||
from bookwyrm.settings import PAGE_LENGTH, USE_AZURE
|
||||
|
||||
|
||||
# pylint: disable=no-self-use
|
||||
|
@ -59,7 +59,7 @@ class ImportList(View):
|
|||
"import_size_limit": site_settings.import_size_limit,
|
||||
"import_limit_reset": site_settings.import_limit_reset,
|
||||
"user_import_time_limit": site_settings.user_import_time_limit,
|
||||
"use_s3": USE_S3,
|
||||
"use_azure": USE_AZURE,
|
||||
}
|
||||
return TemplateResponse(request, "settings/imports/imports.html", data)
|
||||
|
||||
|
|
|
@ -6,16 +6,19 @@ import io
|
|||
from django.contrib.auth.decorators import login_required
|
||||
from django.core.paginator import Paginator
|
||||
from django.db.models import Q
|
||||
from django.http import HttpResponse
|
||||
from django.http import HttpResponse, HttpResponseServerError, Http404
|
||||
from django.template.response import TemplateResponse
|
||||
from django.utils import timezone
|
||||
from django.views import View
|
||||
from django.urls import reverse
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.shortcuts import redirect
|
||||
|
||||
from bookwyrm import models
|
||||
from storages.backends.s3boto3 import S3Boto3Storage
|
||||
|
||||
from bookwyrm import models, storage_backends
|
||||
from bookwyrm.models.bookwyrm_export_job import BookwyrmExportJob
|
||||
from bookwyrm.settings import PAGE_LENGTH
|
||||
from bookwyrm import settings
|
||||
|
||||
|
||||
# pylint: disable=no-self-use,too-many-locals
|
||||
|
@ -144,25 +147,53 @@ class Export(View):
|
|||
# pylint: disable=no-self-use
|
||||
@method_decorator(login_required, name="dispatch")
|
||||
class ExportUser(View):
|
||||
"""Let users export user data to import into another Bookwyrm instance"""
|
||||
"""
|
||||
Let users request and download an archive of user data to import into
|
||||
another Bookwyrm instance.
|
||||
"""
|
||||
|
||||
user_jobs = None
|
||||
|
||||
def setup(self, request, *args, **kwargs):
|
||||
super().setup(request, *args, **kwargs)
|
||||
|
||||
self.user_jobs = BookwyrmExportJob.objects.filter(user=request.user).order_by(
|
||||
"-created_date"
|
||||
)
|
||||
|
||||
def new_export_blocked_until(self):
|
||||
"""whether the user is allowed to request a new export"""
|
||||
last_job = self.user_jobs.first()
|
||||
if not last_job:
|
||||
return None
|
||||
site = models.SiteSettings.objects.get()
|
||||
blocked_until = last_job.created_date + timedelta(
|
||||
hours=site.user_import_time_limit
|
||||
)
|
||||
return blocked_until if blocked_until > timezone.now() else None
|
||||
|
||||
def get(self, request):
|
||||
"""Request tar file"""
|
||||
|
||||
jobs = BookwyrmExportJob.objects.filter(user=request.user).order_by(
|
||||
"-created_date"
|
||||
)
|
||||
site = models.SiteSettings.objects.get()
|
||||
hours = site.user_import_time_limit
|
||||
allowed = (
|
||||
jobs.first().created_date < timezone.now() - timedelta(hours=hours)
|
||||
if jobs.first()
|
||||
else True
|
||||
)
|
||||
next_available = (
|
||||
jobs.first().created_date + timedelta(hours=hours) if not allowed else False
|
||||
)
|
||||
paginated = Paginator(jobs, PAGE_LENGTH)
|
||||
exports = []
|
||||
for job in self.user_jobs:
|
||||
export = {"job": job}
|
||||
|
||||
if job.export_data:
|
||||
try:
|
||||
export["size"] = job.export_data.size
|
||||
export["url"] = reverse("prefs-export-file", args=[job.task_id])
|
||||
except FileNotFoundError:
|
||||
# file no longer exists locally
|
||||
export["unavailable"] = True
|
||||
except Exception: # pylint: disable=broad-except
|
||||
# file no longer exists on storage backend
|
||||
export["unavailable"] = True
|
||||
|
||||
exports.append(export)
|
||||
|
||||
next_available = self.new_export_blocked_until()
|
||||
paginated = Paginator(exports, settings.PAGE_LENGTH)
|
||||
page = paginated.get_page(request.GET.get("page"))
|
||||
data = {
|
||||
"jobs": page,
|
||||
|
@ -175,7 +206,9 @@ class ExportUser(View):
|
|||
return TemplateResponse(request, "preferences/export-user.html", data)
|
||||
|
||||
def post(self, request):
|
||||
"""Download the json file of a user's data"""
|
||||
"""Trigger processing of a new user export file"""
|
||||
if self.new_export_blocked_until() is not None:
|
||||
return HttpResponse(status=429) # Too Many Requests
|
||||
|
||||
job = BookwyrmExportJob.objects.create(user=request.user)
|
||||
job.start_job()
|
||||
|
@ -187,13 +220,35 @@ class ExportUser(View):
|
|||
class ExportArchive(View):
|
||||
"""Serve the archive file"""
|
||||
|
||||
# TODO: how do we serve s3 files?
|
||||
def get(self, request, archive_id):
|
||||
"""download user export file"""
|
||||
export = BookwyrmExportJob.objects.get(task_id=archive_id, user=request.user)
|
||||
return HttpResponse(
|
||||
export.export_data,
|
||||
content_type="application/gzip",
|
||||
headers={
|
||||
"Content-Disposition": 'attachment; filename="bookwyrm-account-export.tar.gz"' # pylint: disable=line-too-long
|
||||
},
|
||||
)
|
||||
|
||||
if isinstance(export.export_data.storage, storage_backends.ExportsS3Storage):
|
||||
# make custom_domain None so we can sign the url
|
||||
# see https://github.com/jschneier/django-storages/issues/944
|
||||
storage = S3Boto3Storage(querystring_auth=True, custom_domain=None)
|
||||
try:
|
||||
url = S3Boto3Storage.url(
|
||||
storage,
|
||||
f"/exports/{export.task_id}.tar.gz",
|
||||
expire=settings.S3_SIGNED_URL_EXPIRY,
|
||||
)
|
||||
except Exception:
|
||||
raise Http404()
|
||||
return redirect(url)
|
||||
|
||||
if isinstance(export.export_data.storage, storage_backends.ExportsFileStorage):
|
||||
try:
|
||||
return HttpResponse(
|
||||
export.export_data,
|
||||
content_type="application/gzip",
|
||||
headers={
|
||||
"Content-Disposition": 'attachment; filename="bookwyrm-account-export.tar.gz"' # pylint: disable=line-too-long
|
||||
},
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise Http404()
|
||||
|
||||
return HttpResponseServerError()
|
||||
|
|
|
@ -29,6 +29,7 @@ services:
|
|||
- .:/app
|
||||
- static_volume:/app/static
|
||||
- media_volume:/app/images
|
||||
- exports_volume:/app/exports
|
||||
depends_on:
|
||||
- db
|
||||
- celery_worker
|
||||
|
@ -67,6 +68,7 @@ services:
|
|||
- .:/app
|
||||
- static_volume:/app/static
|
||||
- media_volume:/app/images
|
||||
- exports_volume:/app/exports
|
||||
depends_on:
|
||||
- db
|
||||
- redis_broker
|
||||
|
@ -81,6 +83,7 @@ services:
|
|||
- .:/app
|
||||
- static_volume:/app/static
|
||||
- media_volume:/app/images
|
||||
- exports_volume:/app/exports
|
||||
depends_on:
|
||||
- celery_worker
|
||||
restart: on-failure
|
||||
|
@ -109,6 +112,7 @@ volumes:
|
|||
pgdata:
|
||||
static_volume:
|
||||
media_volume:
|
||||
exports_volume:
|
||||
redis_broker_data:
|
||||
redis_activity_data:
|
||||
networks:
|
||||
|
|
0
exports/.gitkeep
Normal file
0
exports/.gitkeep
Normal file
|
@ -38,6 +38,7 @@ qrcode==7.3.1
|
|||
redis==4.5.4
|
||||
requests==2.31.0
|
||||
responses==0.22.0
|
||||
s3-tar==0.1.13
|
||||
setuptools>=65.5.1 # Not a direct dependency, pinned to get a security fix
|
||||
tornado==6.3.3 # Not a direct dependency, pinned to get a security fix
|
||||
|
||||
|
|
Loading…
Reference in a new issue