Merge pull request #3134 from dato/trigger_migrations

Support trigger migrations
2024-11-28 12:31:12 +00:00 · 2024-03-20 12:11:34 +01:00 · 2024-03-20 12:11:34 +01:00 · 762786839c
commit 762786839c
parent d08147c6d9 4ca52c0b38
10 changed files with 430 additions and 83 deletions
--- a/bookwyrm/migrations/0190_book_search_updates.py
+++ b/bookwyrm/migrations/0190_book_search_updates.py
@ -0,0 +1,16 @@
 # Generated by Django 3.2.20 on 2023-11-24 17:11
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ("bookwyrm", "0188_theme_loads"),
    ]
    operations = [
        migrations.RemoveIndex(
            model_name="author",
            name="bookwyrm_au_search__b050a8_gin",
        ),
    ]
--- a/bookwyrm/migrations/0191_migrate_search_vec_triggers_to_pgtriggers.py
+++ b/bookwyrm/migrations/0191_migrate_search_vec_triggers_to_pgtriggers.py
@ -0,0 +1,76 @@
 # Generated by Django 3.2.20 on 2023-11-25 00:47
 from importlib import import_module
 import re
 from django.db import migrations
 import pgtrigger.compiler
 import pgtrigger.migrations
 trigger_migration = import_module("bookwyrm.migrations.0077_auto_20210623_2155")
 # it's _very_ convenient for development that this migration be reversible
 search_vector_trigger = trigger_migration.Migration.operations[4]
 author_search_vector_trigger = trigger_migration.Migration.operations[5]
 assert re.search(r"\bCREATE TRIGGER search_vector_trigger\b", search_vector_trigger.sql)
 assert re.search(
    r"\bCREATE TRIGGER author_search_vector_trigger\b",
    author_search_vector_trigger.sql,
 )
 class Migration(migrations.Migration):
    dependencies = [
        ("bookwyrm", "0190_book_search_updates"),
    ]
    operations = [
        pgtrigger.migrations.AddTrigger(
            model_name="book",
            trigger=pgtrigger.compiler.Trigger(
                name="update_search_vector_on_book_edit",
                sql=pgtrigger.compiler.UpsertTriggerSql(
                    func="new.search_vector := setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D');RETURN NEW;",
                    hash="77d6399497c0a89b0bf09d296e33c396da63705c",
                    operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
                    pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
                    table="bookwyrm_book",
                    when="BEFORE",
                ),
            ),
        ),
        pgtrigger.migrations.AddTrigger(
            model_name="author",
            trigger=pgtrigger.compiler.Trigger(
                name="reset_search_vector_on_author_edit",
                sql=pgtrigger.compiler.UpsertTriggerSql(
                    func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
                    hash="e7bbf08711ff3724c58f4d92fb7a082ffb3d7826",
                    operation='UPDATE OF "name"',
                    pgid="pgtrigger_reset_search_vector_on_author_edit_a447c",
                    table="bookwyrm_author",
                    when="AFTER",
                ),
            ),
        ),
        migrations.RunSQL(
            sql="""DROP TRIGGER IF EXISTS search_vector_trigger ON bookwyrm_book;
                   DROP FUNCTION IF EXISTS book_trigger;
            """,
            reverse_sql=search_vector_trigger.sql,
        ),
        migrations.RunSQL(
            sql="""DROP TRIGGER IF EXISTS author_search_vector_trigger ON bookwyrm_author;
                   DROP FUNCTION IF EXISTS author_trigger;
            """,
            reverse_sql=author_search_vector_trigger.sql,
        ),
        migrations.RunSQL(
            # Recalculate book search vector for any missed author name changes
            # due to bug in JOIN in the old trigger.
            sql="UPDATE bookwyrm_book SET search_vector = NULL;",
            reverse_sql=migrations.RunSQL.noop,
        ),
    ]
--- a/bookwyrm/migrations/0196_merge_pr3134_into_main.py
+++ b/bookwyrm/migrations/0196_merge_pr3134_into_main.py
@ -0,0 +1,13 @@
 # Generated by Django 3.2.23 on 2024-03-18 00:48
 from django.db import migrations
 class Migration(migrations.Migration):
    dependencies = [
        ("bookwyrm", "0191_migrate_search_vec_triggers_to_pgtriggers"),
        ("bookwyrm", "0195_alter_user_preferred_language"),
    ]
    operations = []
--- a/bookwyrm/models/author.py
+++ b/bookwyrm/models/author.py
@ -2,11 +2,12 @@
 import re
 from typing import Tuple, Any
 from django.contrib.postgres.indexes import GinIndex
 from django.db import models
 import pgtrigger
 from bookwyrm import activitypub
 from bookwyrm.settings import DOMAIN
 from bookwyrm.utils.db import format_trigger
 from .book import BookDataModel
 from . import fields
@ -67,9 +68,28 @@ class Author(BookDataModel):
        """editions and works both use "book" instead of model_name"""
        return f"https://{DOMAIN}/author/{self.id}"
    activity_serializer = activitypub.Author
    class Meta:
-        """sets up postgres GIN index field"""
+        """sets up indexes and triggers"""
-        indexes = (GinIndex(fields=["search_vector"]),)
+        triggers = [
            pgtrigger.Trigger(
                name="reset_search_vector_on_author_edit",
                when=pgtrigger.After,
                operation=pgtrigger.UpdateOf("name"),
                func=format_trigger(
                    """WITH updated_books AS (
                         SELECT book_id
                         FROM bookwyrm_book_authors
                         WHERE author_id = new.id
                    )
                    UPDATE bookwyrm_book
                    SET search_vector = ''
                    FROM updated_books
                    WHERE id = updated_books.book_id;
                    RETURN new;
                """
                ),
            )
        ]
    activity_serializer = activitypub.Author
--- a/bookwyrm/models/book.py
+++ b/bookwyrm/models/book.py
@ -13,6 +13,7 @@ from django.utils.translation import gettext_lazy as _
 from model_utils import FieldTracker
 from model_utils.managers import InheritanceManager
 from imagekit.models import ImageSpecField
 import pgtrigger
 from bookwyrm import activitypub
 from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator
@ -24,6 +25,7 @@ from bookwyrm.settings import (
    ENABLE_PREVIEW_IMAGES,
    ENABLE_THUMBNAIL_GENERATION,
 )
 from bookwyrm.utils.db import format_trigger
 from .activitypub_mixin import OrderedCollectionPageMixin, ObjectMixin
 from .base_model import BookWyrmModel
@ -232,9 +234,39 @@ class Book(BookDataModel):
        )
    class Meta:
-        """sets up postgres GIN index field"""
+        """set up indexes and triggers"""
        # pylint: disable=line-too-long
        indexes = (GinIndex(fields=["search_vector"]),)
        triggers = [
            pgtrigger.Trigger(
                name="update_search_vector_on_book_edit",
                when=pgtrigger.Before,
                operation=pgtrigger.Insert
                | pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"),
                func=format_trigger(
                    """new.search_vector :=
                         -- title, with priority A (parse in English, default to simple if empty)
                         setweight(COALESCE(nullif(
                                       to_tsvector('english', new.title), ''),
                                       to_tsvector('simple', new.title)), 'A') ||
                         -- subtitle, with priority B (always in English?)
                         setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
                         -- list of authors, with priority C (TODO: add aliases?, bookwyrm-social#3063)
                         (SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(bookwyrm_author.name), ' '), '')), 'C')
                           FROM bookwyrm_author
                           LEFT JOIN bookwyrm_book_authors
                               ON bookwyrm_author.id = bookwyrm_book_authors.author_id
                           WHERE bookwyrm_book_authors.book_id = new.id
                         ) ||
                         --- last: series name, with lowest priority
                         setweight(to_tsvector('english', COALESCE(new.series, '')), 'D');
                       RETURN new;
                """
                ),
            )
        ]
 class Work(OrderedCollectionPageMixin, Book):
--- a/bookwyrm/settings.py
+++ b/bookwyrm/settings.py
@ -108,6 +108,7 @@ INSTALLED_APPS = [
    "celery",
    "django_celery_beat",
    "imagekit",
    "pgtrigger",
    "storages",
 ]
--- a/bookwyrm/tests/test_book_search.py
+++ b/bookwyrm/tests/test_book_search.py
@ -1,5 +1,6 @@
 """ test searching for books """
 import datetime
 from django.db import connection
 from django.test import TestCase
 from django.utils import timezone
@ -140,3 +141,244 @@ class BookSearch(TestCase):
        # there's really not much to test here, it's just a dataclass
        self.assertEqual(result.confidence, 1)
        self.assertEqual(result.title, "Title")
 class SearchVectorTest(TestCase):
    """check search_vector is computed correctly"""
    def test_search_vector_simple(self):
        """simplest search vector"""
        book = self._create_book("Book", "Mary")
        self.assertEqual(book.search_vector, "'book':1A 'mary':2C")  # A > C (priority)
    def test_search_vector_all_parts(self):
        """search vector with subtitle and series"""
        # for a book like this we call `to_tsvector("Book Long Mary Bunch")`, hence the
        # indexes in the search vector. (priority "D" is the default, and never shown.)
        book = self._create_book("Book", "Mary", subtitle="Long", series="Bunch")
        self.assertEqual(book.search_vector, "'book':1A 'bunch':4 'long':2B 'mary':3C")
    def test_search_vector_parse_book(self):
        """book parts are parsed in english"""
        # FIXME: at some point this should stop being the default.
        book = self._create_book(
            "Edition", "Editor", series="Castle", subtitle="Writing"
        )
        self.assertEqual(
            book.search_vector, "'castl':4 'edit':1A 'editor':3C 'write':2B"
        )
    def test_search_vector_parse_author(self):
        """author name is not stem'd or affected by stop words"""
        book = self._create_book("Writing", "Writes")
        self.assertEqual(book.search_vector, "'write':1A 'writes':2C")
        book = self._create_book("She Is Writing", "She Writes")
        self.assertEqual(book.search_vector, "'she':4C 'write':3A 'writes':5C")
    def test_search_vector_parse_title_empty(self):
        """empty parse in English retried as simple title"""
        book = self._create_book("Here We", "John")
        self.assertEqual(book.search_vector, "'here':1A 'john':3C 'we':2A")
        book = self._create_book("Hear We Come", "John")
        self.assertEqual(book.search_vector, "'come':3A 'hear':1A 'john':4C")
        book = self._create_book("there there", "the")
        self.assertEqual(book.search_vector, "'the':3C 'there':1A,2A")
    def test_search_vector_no_author(self):
        """book with no authors gets processed normally"""
        book = self._create_book("Book", None, series="Bunch")
        self.assertEqual(book.search_vector, "'book':1A 'bunch':2")
        book = self._create_book("there there", None)
        self.assertEqual(book.search_vector, "'there':1A,2A")
    # n.b.: the following originally from test_posgres.py
    def test_search_vector_on_update(self):
        """make sure that search_vector is being set correctly on edit"""
        book = self._create_book("The Long Goodbye", None)
        self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
        book.title = "The Even Longer Goodbye"
        book.save(broadcast=False)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
    def test_search_vector_on_author_update(self):
        """update search when an author name changes"""
        book = self._create_book("The Long Goodbye", "The Rays")
        self.assertEqual(book.search_vector, "'goodby':3A 'long':2A 'rays':5C 'the':4C")
        author = models.Author.objects.get(name="The Rays")
        author.name = "Jeremy"
        author.save(broadcast=False)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
    def test_search_vector_on_author_delete(self):
        """update search when an author is deleted"""
        book = self._create_book("The Long Goodbye", "The Rays")
        self.assertEqual(book.search_vector, "'goodby':3A 'long':2A 'rays':5C 'the':4C")
        author = models.Author.objects.get(name="The Rays")
        book.authors.remove(author)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
    def test_search_vector_fields(self):
        """language field irrelevant for search_vector"""
        author = models.Author.objects.create(name="The Rays")
        book = models.Edition.objects.create(
            title="The Long Goodbye",
            subtitle="wow cool",
            series="series name",
            languages=["irrelevant"],
        )
        book.authors.add(author)
        book.refresh_from_db()
        self.assertEqual(
            book.search_vector,
            # pylint: disable-next=line-too-long
            "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
        )
    @staticmethod
    def _create_book(
        title, author_name, /, *, subtitle="", series="", author_alias=None
    ):
        """quickly create a book"""
        work = models.Work.objects.create(title="work")
        edition = models.Edition.objects.create(
            title=title,
            series=series or None,
            subtitle=subtitle or None,
            isbn_10="0000000000",
            parent_work=work,
        )
        if author_name is not None:
            author = models.Author.objects.create(
                name=author_name, aliases=author_alias or []
            )
            edition.authors.add(author)
            edition.save(broadcast=False)
        edition.refresh_from_db()
        return edition
 class SearchVectorUpdates(TestCase):
    """look for books as they change"""  # functional tests of the above
    def setUp(self):
        """we need basic test data and mocks"""
        self.work = models.Work.objects.create(title="This Work")
        self.author = models.Author.objects.create(name="Name")
        self.edition = models.Edition.objects.create(
            title="First Edition of Work",
            subtitle="Some Extra Words Are Good",
            series="A Fabulous Sequence of Items",
            parent_work=self.work,
            isbn_10="0000000000",
        )
        self.edition.authors.add(self.author)
        self.edition.save(broadcast=False)
    @classmethod
    def setUpTestData(cls):
        """create conditions that trigger known old bugs"""
        with connection.cursor() as cursor:
            cursor.execute(
                """
                ALTER SEQUENCE bookwyrm_author_id_seq       RESTART WITH 20;
                ALTER SEQUENCE bookwyrm_book_authors_id_seq RESTART WITH 300;
                """
            )
    def test_search_after_changed_metadata(self):
        """book found after updating metadata"""
        self.assertEqual(self.edition, self._search_first("First"))  # title
        self.assertEqual(self.edition, self._search_first("Good"))  # subtitle
        self.assertEqual(self.edition, self._search_first("Sequence"))  # series
        self.edition.title = "Second Title of Work"
        self.edition.subtitle = "Fewer Words Is Better"
        self.edition.series = "A Wondrous Bunch"
        self.edition.save(broadcast=False)
        self.assertEqual(self.edition, self._search_first("Second"))  # title new
        self.assertEqual(self.edition, self._search_first("Fewer"))  # subtitle new
        self.assertEqual(self.edition, self._search_first("Wondrous"))  # series new
        self.assertFalse(self._search_first("First"))  # title old
        self.assertFalse(self._search_first("Good"))  # subtitle old
        self.assertFalse(self._search_first("Sequence"))  # series old
    def test_search_after_author_remove(self):
        """book not found via removed author"""
        self.assertEqual(self.edition, self._search_first("Name"))
        self.edition.authors.set([])
        self.edition.save(broadcast=False)
        self.assertFalse(self._search("Name"))
        self.assertEqual(self.edition, self._search_first("Edition"))
    def test_search_after_author_add(self):
        """book found by newly-added author"""
        new_author = models.Author.objects.create(name="Mozilla")
        self.assertFalse(self._search("Mozilla"))
        self.edition.authors.add(new_author)
        self.edition.save(broadcast=False)
        self.assertEqual(self.edition, self._search_first("Mozilla"))
        self.assertEqual(self.edition, self._search_first("Name"))
    def test_search_after_author_add_remove_sql(self):
        """add/remove author through SQL to ensure execution of book_authors trigger"""
        # Tests calling edition.save(), above, pass even if the trigger in
        # bookwyrm_book_authors is removed (probably because they trigger the one
        # in bookwyrm_book directly). Here we make sure to exercise the former.
        new_author = models.Author.objects.create(name="Mozilla")
        with connection.cursor() as cursor:
            cursor.execute(
                "DELETE FROM bookwyrm_book_authors WHERE book_id = %s",
                [self.edition.id],
            )
        self.assertFalse(self._search("Name"))
        self.assertFalse(self._search("Mozilla"))
        with connection.cursor() as cursor:
            cursor.execute(
                "INSERT INTO bookwyrm_book_authors (book_id,author_id) VALUES (%s,%s)",
                [self.edition.id, new_author.id],
            )
        self.assertFalse(self._search("Name"))
        self.assertEqual(self.edition, self._search_first("Mozilla"))
    def test_search_after_updated_author_name(self):
        """book found under new author name"""
        self.assertEqual(self.edition, self._search_first("Name"))
        self.assertFalse(self._search("Identifier"))
        self.author.name = "Identifier"
        self.author.save(broadcast=False)
        self.assertFalse(self._search("Name"))
        self.assertEqual(self.edition, self._search_first("Identifier"))
        self.assertEqual(self.edition, self._search_first("Work"))
    def _search_first(self, query):
        """wrapper around search_title_author"""
        return self._search(query, return_first=True)
    @staticmethod
    def _search(query, *, return_first=False):
        """wrapper around search_title_author"""
        return book_search.search_title_author(
            query, min_confidence=0, return_first=return_first
        )
--- a/bookwyrm/tests/test_postgres.py
+++ b/bookwyrm/tests/test_postgres.py
@ -1,77 +0,0 @@
 """ django configuration of postgres  """
 from unittest.mock import patch
 from django.test import TestCase
 from bookwyrm import models
@patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async")
 class PostgresTriggers(TestCase):
    """special migrations, fancy stuff ya know"""
    def test_search_vector_on_create(self, _):
        """make sure that search_vector is being set correctly on create"""
        book = models.Edition.objects.create(title="The Long Goodbye")
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
    def test_search_vector_on_update(self, _):
        """make sure that search_vector is being set correctly on edit"""
        book = models.Edition.objects.create(title="The Long Goodbye")
        book.title = "The Even Longer Goodbye"
        book.save(broadcast=False)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
    def test_search_vector_fields(self, _):
        """use multiple fields to create search vector"""
        author = models.Author.objects.create(name="The Rays")
        book = models.Edition.objects.create(
            title="The Long Goodbye",
            subtitle="wow cool",
            series="series name",
            languages=["irrelevant"],
        )
        book.authors.add(author)
        book.refresh_from_db()
        # pylint: disable=line-too-long
        self.assertEqual(
            book.search_vector,
            "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
        )
    def test_search_vector_on_author_update(self, _):
        """update search when an author name changes"""
        author = models.Author.objects.create(name="The Rays")
        book = models.Edition.objects.create(
            title="The Long Goodbye",
        )
        book.authors.add(author)
        author.name = "Jeremy"
        author.save(broadcast=False)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
    def test_search_vector_on_author_delete(self, _):
        """update search when an author name changes"""
        author = models.Author.objects.create(name="Jeremy")
        book = models.Edition.objects.create(
            title="The Long Goodbye",
        )
        book.authors.add(author)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
        book.authors.remove(author)
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
    def test_search_vector_stop_word_fallback(self, _):
        """use a fallback when removing stop words leads to an empty vector"""
        book = models.Edition.objects.create(
            title="there there",
        )
        book.refresh_from_db()
        self.assertEqual(book.search_vector, "'there':1A,2A")
--- a/bookwyrm/utils/db.py
+++ b/bookwyrm/utils/db.py
@ -0,0 +1,23 @@
 """ Database utilities """
 from typing import cast
 import sqlparse  # type: ignore
 def format_trigger(sql: str) -> str:
    """format SQL trigger before storing
    we remove whitespace and use consistent casing so as to avoid migrations
    due to formatting changes.
    """
    return cast(
        str,
        sqlparse.format(
            sql,
            strip_comments=True,
            strip_whitespace=True,
            use_space_around_operators=True,
            keyword_case="upper",
            identifier_case="lower",
        ),
    )
--- a/requirements.txt
+++ b/requirements.txt
@ -10,6 +10,7 @@ django-compressor==4.4
 django-csp==3.7
 django-imagekit==4.1.0
 django-model-utils==4.3.1
 django-pgtrigger==4.11.0
 django-redis==5.2.0
 django-sass-processor==1.2.2
 django-storages==1.13.2