Merge pull request #2713 from WesleyAC/buffer-csv-export

Change CSV export to buffer instead of streaming
This commit is contained in:
Mouse Reeve 2023-03-12 16:17:53 -07:00 committed by GitHub
commit 84b8a5c433
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 64 deletions

View file

@ -1,7 +1,7 @@
""" test for app action functionality """
from unittest.mock import patch
from django.http import StreamingHttpResponse
from django.http import HttpResponse
from django.test import TestCase
from django.test.client import RequestFactory
@ -57,13 +57,12 @@ class ExportViews(TestCase):
request = self.factory.post("")
request.user = self.local_user
export = views.Export.as_view()(request)
self.assertIsInstance(export, StreamingHttpResponse)
self.assertIsInstance(export, HttpResponse)
self.assertEqual(export.status_code, 200)
result = list(export.streaming_content)
# pylint: disable=line-too-long
self.assertEqual(
result[0],
b"title,author_text,remote_id,openlibrary_key,inventaire_id,librarything_key,goodreads_key,bnf_id,viaf,wikidata,asin,aasin,isfdb,isbn_10,isbn_13,oclc_number,rating,review_name,review_cw,review_content\r\n",
export.content,
b"title,author_text,remote_id,openlibrary_key,inventaire_id,librarything_key,goodreads_key,bnf_id,viaf,wikidata,asin,aasin,isfdb,isbn_10,isbn_13,oclc_number,rating,review_name,review_cw,review_content\r\nTest Book,,"
+ self.book.remote_id.encode("utf-8")
+ b",,,,,beep,,,,,,123456789X,9781234567890,,,,,\r\n",
)
expected = f"Test Book,,{self.book.remote_id},,,,,beep,,,,,,123456789X,9781234567890,,,,,\r\n"
self.assertEqual(result[1].decode("utf-8"), expected)

View file

@ -1,9 +1,10 @@
""" Let users export their book data """
import csv
import io
from django.contrib.auth.decorators import login_required
from django.db.models import Q
from django.http import StreamingHttpResponse
from django.http import HttpResponse
from django.template.response import TemplateResponse
from django.views import View
from django.utils.decorators import method_decorator
@ -20,8 +21,8 @@ class Export(View):
return TemplateResponse(request, "preferences/export.html")
def post(self, request):
"""Streaming the csv file of a user's book data"""
data = (
"""Download the csv file of a user's book data"""
books = (
models.Edition.viewer_aware_objects(request.user)
.filter(
Q(shelves__user=request.user)
@ -33,63 +34,50 @@ class Export(View):
.distinct()
)
generator = csv_row_generator(data, request.user)
csv_string = io.StringIO()
writer = csv.writer(csv_string)
pseudo_buffer = Echo()
writer = csv.writer(pseudo_buffer)
# for testing, if you want to see the results in the browser:
# from django.http import JsonResponse
# return JsonResponse(list(generator), safe=False)
return StreamingHttpResponse(
(writer.writerow(row) for row in generator),
deduplication_fields = [
f.name
for f in models.Edition._meta.get_fields() # pylint: disable=protected-access
if getattr(f, "deduplication_field", False)
]
fields = (
["title", "author_text"]
+ deduplication_fields
+ ["rating", "review_name", "review_cw", "review_content"]
)
writer.writerow(fields)
for book in books:
# I think this is more efficient than doing a subquery in the view? but idk
review_rating = (
models.Review.objects.filter(
user=request.user, book=book, rating__isnull=False
)
.order_by("-published_date")
.first()
)
book.rating = review_rating.rating if review_rating else None
review = (
models.Review.objects.filter(
user=request.user, book=book, content__isnull=False
)
.order_by("-published_date")
.first()
)
if review:
book.review_name = review.name
book.review_cw = review.content_warning
book.review_content = review.raw_content
writer.writerow([getattr(book, field, "") or "" for field in fields])
return HttpResponse(
csv_string.getvalue(),
content_type="text/csv",
headers={
"Content-Disposition": 'attachment; filename="bookwyrm-export.csv"'
},
)
def csv_row_generator(books, user):
"""generate a csv entry for the user's book"""
deduplication_fields = [
f.name
for f in models.Edition._meta.get_fields() # pylint: disable=protected-access
if getattr(f, "deduplication_field", False)
]
fields = (
["title", "author_text"]
+ deduplication_fields
+ ["rating", "review_name", "review_cw", "review_content"]
)
yield fields
for book in books:
# I think this is more efficient than doing a subquery in the view? but idk
review_rating = (
models.Review.objects.filter(user=user, book=book, rating__isnull=False)
.order_by("-published_date")
.first()
)
book.rating = review_rating.rating if review_rating else None
review = (
models.Review.objects.filter(user=user, book=book, content__isnull=False)
.order_by("-published_date")
.first()
)
if review:
book.review_name = review.name
book.review_cw = review.content_warning
book.review_content = review.raw_content
yield [getattr(book, field, "") or "" for field in fields]
class Echo:
"""An object that implements just the write method of the file-like
interface. (https://docs.djangoproject.com/en/3.2/howto/outputting-csv/)
"""
# pylint: disable=no-self-use
def write(self, value):
"""Write the value by returning it, instead of storing in a buffer."""
return value