From 261e794c1cfebd7895d0bbbad463e80192c769e3 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 9 Jun 2024 10:34:22 +1000 Subject: [PATCH 1/2] possible fix for #3372 - user export timeouts This definitely needs to be tested on a large DB but I believe it may fix the timeouts b.s. gets when running user exports. Instead of a gigantic single DB query with heaps of joins, we instead just do a series of simple queries and then use union() to pull them into a de-duped queryset. If I understand the results from explain() correctly, this is a massive reduction in DB work: Unique (cost=195899.15..198201.71 rows=11808 width=19220) vs Unique (cost=150.28..153.44 rows=16 width=19220) --- bookwyrm/models/bookwyrm_export_job.py | 33 ++++++++++++++++---------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index f355c86a4..9cf4aeb61 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -315,19 +315,28 @@ def export_book(user: User, edition: Edition): def get_books_for_user(user): - """Get all the books and editions related to a user""" + """ + Get all the books and editions related to a user. - editions = ( - Edition.objects.select_related("parent_work") - .filter( - Q(shelves__user=user) - | Q(readthrough__user=user) - | Q(review__user=user) - | Q(list__user=user) - | Q(comment__user=user) - | Q(quotation__user=user) - ) - .distinct() + We use union() instead of Q objects because it creates + multiple simple queries in stead of a much more complex DB query + that can time out. + + """ + + shelf_eds = Edition.objects.select_related("parent_work").filter(shelves__user=user) + rt_eds = Edition.objects.select_related("parent_work").filter( + readthrough__user=user + ) + review_eds = Edition.objects.select_related("parent_work").filter(review__user=user) + list_eds = Edition.objects.select_related("parent_work").filter(list__user=user) + comment_eds = Edition.objects.select_related("parent_work").filter( + comment__user=user + ) + quote_eds = Edition.objects.select_related("parent_work").filter( + quotation__user=user ) + editions = shelf_eds.union(rt_eds, review_eds, list_eds, comment_eds, quote_eds) + return editions From 1d4119e8534fec9bb8115995e7d64082b75e7cc9 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 9 Jun 2024 10:59:11 +1000 Subject: [PATCH 2/2] LOL remove Q import so pylint doesn't grumble --- bookwyrm/models/bookwyrm_export_job.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index 9cf4aeb61..870910c00 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -7,7 +7,6 @@ from boto3.session import Session as BotoSession from s3_tar import S3Tar from django.db.models import BooleanField, FileField, JSONField -from django.db.models import Q from django.core.serializers.json import DjangoJSONEncoder from django.core.files.base import ContentFile from django.core.files.storage import storages