Adding batch processing to the sort title migration

This commit is contained in:
Zach Flanders 2023-04-27 15:30:52 -05:00
parent a6e5939ad2
commit a1df116c58

View file

@ -7,6 +7,16 @@ from django.db.models import Q
from bookwyrm.settings import LANGUAGE_ARTICLES from bookwyrm.settings import LANGUAGE_ARTICLES
def set_sort_title(edition):
articles = chain(
*(LANGUAGE_ARTICLES.get(language, ()) for language in tuple(edition.languages))
)
edition.sort_title = re.sub(
f'^{" |^".join(articles)} ', "", str(edition.title).lower()
)
return edition
@transaction.atomic @transaction.atomic
def populate_sort_title(apps, schema_editor): def populate_sort_title(apps, schema_editor):
Edition = apps.get_model("bookwyrm", "Edition") Edition = apps.get_model("bookwyrm", "Edition")
@ -14,17 +24,16 @@ def populate_sort_title(apps, schema_editor):
editions_wo_sort_title = Edition.objects.using(db_alias).filter( editions_wo_sort_title = Edition.objects.using(db_alias).filter(
Q(sort_title__isnull=True) | Q(sort_title__exact="") Q(sort_title__isnull=True) | Q(sort_title__exact="")
) )
for edition in editions_wo_sort_title: batch_size = 50000
articles = chain( start = 0
*( end = batch_size
LANGUAGE_ARTICLES.get(language, ()) while editions_wo_sort_title[start:end]:
for language in tuple(edition.languages) Edition.objects.bulk_update(
) (set_sort_title(edition) for edition in editions_wo_sort_title[start:end]),
["sort_title"],
) )
edition.sort_title = re.sub( start = end
f'^{" |^".join(articles)} ', "", str(edition.title).lower() end += batch_size
)
Edition.objects.bulk_update(editions_wo_sort_title, ["sort_title"])
class Migration(migrations.Migration): class Migration(migrations.Migration):