Merge pull request #3134 from dato/trigger_migrations

Support trigger migrations
This commit is contained in:
Bart Schuurmans 2024-03-20 12:11:34 +01:00 committed by GitHub
commit 762786839c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 430 additions and 83 deletions

View file

@ -0,0 +1,16 @@
# Generated by Django 3.2.20 on 2023-11-24 17:11
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0188_theme_loads"),
]
operations = [
migrations.RemoveIndex(
model_name="author",
name="bookwyrm_au_search__b050a8_gin",
),
]

View file

@ -0,0 +1,76 @@
# Generated by Django 3.2.20 on 2023-11-25 00:47
from importlib import import_module
import re
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations
trigger_migration = import_module("bookwyrm.migrations.0077_auto_20210623_2155")
# it's _very_ convenient for development that this migration be reversible
search_vector_trigger = trigger_migration.Migration.operations[4]
author_search_vector_trigger = trigger_migration.Migration.operations[5]
assert re.search(r"\bCREATE TRIGGER search_vector_trigger\b", search_vector_trigger.sql)
assert re.search(
r"\bCREATE TRIGGER author_search_vector_trigger\b",
author_search_vector_trigger.sql,
)
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0190_book_search_updates"),
]
operations = [
pgtrigger.migrations.AddTrigger(
model_name="book",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_book_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="new.search_vector := setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D');RETURN NEW;",
hash="77d6399497c0a89b0bf09d296e33c396da63705c",
operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
table="bookwyrm_book",
when="BEFORE",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="reset_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
hash="e7bbf08711ff3724c58f4d92fb7a082ffb3d7826",
operation='UPDATE OF "name"',
pgid="pgtrigger_reset_search_vector_on_author_edit_a447c",
table="bookwyrm_author",
when="AFTER",
),
),
),
migrations.RunSQL(
sql="""DROP TRIGGER IF EXISTS search_vector_trigger ON bookwyrm_book;
DROP FUNCTION IF EXISTS book_trigger;
""",
reverse_sql=search_vector_trigger.sql,
),
migrations.RunSQL(
sql="""DROP TRIGGER IF EXISTS author_search_vector_trigger ON bookwyrm_author;
DROP FUNCTION IF EXISTS author_trigger;
""",
reverse_sql=author_search_vector_trigger.sql,
),
migrations.RunSQL(
# Recalculate book search vector for any missed author name changes
# due to bug in JOIN in the old trigger.
sql="UPDATE bookwyrm_book SET search_vector = NULL;",
reverse_sql=migrations.RunSQL.noop,
),
]

View file

@ -0,0 +1,13 @@
# Generated by Django 3.2.23 on 2024-03-18 00:48
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0191_migrate_search_vec_triggers_to_pgtriggers"),
("bookwyrm", "0195_alter_user_preferred_language"),
]
operations = []

View file

@ -2,11 +2,12 @@
import re
from typing import Tuple, Any
from django.contrib.postgres.indexes import GinIndex
from django.db import models
import pgtrigger
from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN
from bookwyrm.utils.db import format_trigger
from .book import BookDataModel
from . import fields
@ -67,9 +68,28 @@ class Author(BookDataModel):
"""editions and works both use "book" instead of model_name"""
return f"https://{DOMAIN}/author/{self.id}"
activity_serializer = activitypub.Author
class Meta:
"""sets up postgres GIN index field"""
"""sets up indexes and triggers"""
indexes = (GinIndex(fields=["search_vector"]),)
triggers = [
pgtrigger.Trigger(
name="reset_search_vector_on_author_edit",
when=pgtrigger.After,
operation=pgtrigger.UpdateOf("name"),
func=format_trigger(
"""WITH updated_books AS (
SELECT book_id
FROM bookwyrm_book_authors
WHERE author_id = new.id
)
UPDATE bookwyrm_book
SET search_vector = ''
FROM updated_books
WHERE id = updated_books.book_id;
RETURN new;
"""
),
)
]
activity_serializer = activitypub.Author

View file

@ -13,6 +13,7 @@ from django.utils.translation import gettext_lazy as _
from model_utils import FieldTracker
from model_utils.managers import InheritanceManager
from imagekit.models import ImageSpecField
import pgtrigger
from bookwyrm import activitypub
from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator
@ -24,6 +25,7 @@ from bookwyrm.settings import (
ENABLE_PREVIEW_IMAGES,
ENABLE_THUMBNAIL_GENERATION,
)
from bookwyrm.utils.db import format_trigger
from .activitypub_mixin import OrderedCollectionPageMixin, ObjectMixin
from .base_model import BookWyrmModel
@ -232,9 +234,39 @@ class Book(BookDataModel):
)
class Meta:
"""sets up postgres GIN index field"""
"""set up indexes and triggers"""
# pylint: disable=line-too-long
indexes = (GinIndex(fields=["search_vector"]),)
triggers = [
pgtrigger.Trigger(
name="update_search_vector_on_book_edit",
when=pgtrigger.Before,
operation=pgtrigger.Insert
| pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"),
func=format_trigger(
"""new.search_vector :=
-- title, with priority A (parse in English, default to simple if empty)
setweight(COALESCE(nullif(
to_tsvector('english', new.title), ''),
to_tsvector('simple', new.title)), 'A') ||
-- subtitle, with priority B (always in English?)
setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
-- list of authors, with priority C (TODO: add aliases?, bookwyrm-social#3063)
(SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(bookwyrm_author.name), ' '), '')), 'C')
FROM bookwyrm_author
LEFT JOIN bookwyrm_book_authors
ON bookwyrm_author.id = bookwyrm_book_authors.author_id
WHERE bookwyrm_book_authors.book_id = new.id
) ||
--- last: series name, with lowest priority
setweight(to_tsvector('english', COALESCE(new.series, '')), 'D');
RETURN new;
"""
),
)
]
class Work(OrderedCollectionPageMixin, Book):

View file

@ -108,6 +108,7 @@ INSTALLED_APPS = [
"celery",
"django_celery_beat",
"imagekit",
"pgtrigger",
"storages",
]

View file

@ -1,5 +1,6 @@
""" test searching for books """
import datetime
from django.db import connection
from django.test import TestCase
from django.utils import timezone
@ -140,3 +141,244 @@ class BookSearch(TestCase):
# there's really not much to test here, it's just a dataclass
self.assertEqual(result.confidence, 1)
self.assertEqual(result.title, "Title")
class SearchVectorTest(TestCase):
"""check search_vector is computed correctly"""
def test_search_vector_simple(self):
"""simplest search vector"""
book = self._create_book("Book", "Mary")
self.assertEqual(book.search_vector, "'book':1A 'mary':2C") # A > C (priority)
def test_search_vector_all_parts(self):
"""search vector with subtitle and series"""
# for a book like this we call `to_tsvector("Book Long Mary Bunch")`, hence the
# indexes in the search vector. (priority "D" is the default, and never shown.)
book = self._create_book("Book", "Mary", subtitle="Long", series="Bunch")
self.assertEqual(book.search_vector, "'book':1A 'bunch':4 'long':2B 'mary':3C")
def test_search_vector_parse_book(self):
"""book parts are parsed in english"""
# FIXME: at some point this should stop being the default.
book = self._create_book(
"Edition", "Editor", series="Castle", subtitle="Writing"
)
self.assertEqual(
book.search_vector, "'castl':4 'edit':1A 'editor':3C 'write':2B"
)
def test_search_vector_parse_author(self):
"""author name is not stem'd or affected by stop words"""
book = self._create_book("Writing", "Writes")
self.assertEqual(book.search_vector, "'write':1A 'writes':2C")
book = self._create_book("She Is Writing", "She Writes")
self.assertEqual(book.search_vector, "'she':4C 'write':3A 'writes':5C")
def test_search_vector_parse_title_empty(self):
"""empty parse in English retried as simple title"""
book = self._create_book("Here We", "John")
self.assertEqual(book.search_vector, "'here':1A 'john':3C 'we':2A")
book = self._create_book("Hear We Come", "John")
self.assertEqual(book.search_vector, "'come':3A 'hear':1A 'john':4C")
book = self._create_book("there there", "the")
self.assertEqual(book.search_vector, "'the':3C 'there':1A,2A")
def test_search_vector_no_author(self):
"""book with no authors gets processed normally"""
book = self._create_book("Book", None, series="Bunch")
self.assertEqual(book.search_vector, "'book':1A 'bunch':2")
book = self._create_book("there there", None)
self.assertEqual(book.search_vector, "'there':1A,2A")
# n.b.: the following originally from test_posgres.py
def test_search_vector_on_update(self):
"""make sure that search_vector is being set correctly on edit"""
book = self._create_book("The Long Goodbye", None)
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
book.title = "The Even Longer Goodbye"
book.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
def test_search_vector_on_author_update(self):
"""update search when an author name changes"""
book = self._create_book("The Long Goodbye", "The Rays")
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A 'rays':5C 'the':4C")
author = models.Author.objects.get(name="The Rays")
author.name = "Jeremy"
author.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
def test_search_vector_on_author_delete(self):
"""update search when an author is deleted"""
book = self._create_book("The Long Goodbye", "The Rays")
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A 'rays':5C 'the':4C")
author = models.Author.objects.get(name="The Rays")
book.authors.remove(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_fields(self):
"""language field irrelevant for search_vector"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
subtitle="wow cool",
series="series name",
languages=["irrelevant"],
)
book.authors.add(author)
book.refresh_from_db()
self.assertEqual(
book.search_vector,
# pylint: disable-next=line-too-long
"'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
)
@staticmethod
def _create_book(
title, author_name, /, *, subtitle="", series="", author_alias=None
):
"""quickly create a book"""
work = models.Work.objects.create(title="work")
edition = models.Edition.objects.create(
title=title,
series=series or None,
subtitle=subtitle or None,
isbn_10="0000000000",
parent_work=work,
)
if author_name is not None:
author = models.Author.objects.create(
name=author_name, aliases=author_alias or []
)
edition.authors.add(author)
edition.save(broadcast=False)
edition.refresh_from_db()
return edition
class SearchVectorUpdates(TestCase):
"""look for books as they change""" # functional tests of the above
def setUp(self):
"""we need basic test data and mocks"""
self.work = models.Work.objects.create(title="This Work")
self.author = models.Author.objects.create(name="Name")
self.edition = models.Edition.objects.create(
title="First Edition of Work",
subtitle="Some Extra Words Are Good",
series="A Fabulous Sequence of Items",
parent_work=self.work,
isbn_10="0000000000",
)
self.edition.authors.add(self.author)
self.edition.save(broadcast=False)
@classmethod
def setUpTestData(cls):
"""create conditions that trigger known old bugs"""
with connection.cursor() as cursor:
cursor.execute(
"""
ALTER SEQUENCE bookwyrm_author_id_seq RESTART WITH 20;
ALTER SEQUENCE bookwyrm_book_authors_id_seq RESTART WITH 300;
"""
)
def test_search_after_changed_metadata(self):
"""book found after updating metadata"""
self.assertEqual(self.edition, self._search_first("First")) # title
self.assertEqual(self.edition, self._search_first("Good")) # subtitle
self.assertEqual(self.edition, self._search_first("Sequence")) # series
self.edition.title = "Second Title of Work"
self.edition.subtitle = "Fewer Words Is Better"
self.edition.series = "A Wondrous Bunch"
self.edition.save(broadcast=False)
self.assertEqual(self.edition, self._search_first("Second")) # title new
self.assertEqual(self.edition, self._search_first("Fewer")) # subtitle new
self.assertEqual(self.edition, self._search_first("Wondrous")) # series new
self.assertFalse(self._search_first("First")) # title old
self.assertFalse(self._search_first("Good")) # subtitle old
self.assertFalse(self._search_first("Sequence")) # series old
def test_search_after_author_remove(self):
"""book not found via removed author"""
self.assertEqual(self.edition, self._search_first("Name"))
self.edition.authors.set([])
self.edition.save(broadcast=False)
self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Edition"))
def test_search_after_author_add(self):
"""book found by newly-added author"""
new_author = models.Author.objects.create(name="Mozilla")
self.assertFalse(self._search("Mozilla"))
self.edition.authors.add(new_author)
self.edition.save(broadcast=False)
self.assertEqual(self.edition, self._search_first("Mozilla"))
self.assertEqual(self.edition, self._search_first("Name"))
def test_search_after_author_add_remove_sql(self):
"""add/remove author through SQL to ensure execution of book_authors trigger"""
# Tests calling edition.save(), above, pass even if the trigger in
# bookwyrm_book_authors is removed (probably because they trigger the one
# in bookwyrm_book directly). Here we make sure to exercise the former.
new_author = models.Author.objects.create(name="Mozilla")
with connection.cursor() as cursor:
cursor.execute(
"DELETE FROM bookwyrm_book_authors WHERE book_id = %s",
[self.edition.id],
)
self.assertFalse(self._search("Name"))
self.assertFalse(self._search("Mozilla"))
with connection.cursor() as cursor:
cursor.execute(
"INSERT INTO bookwyrm_book_authors (book_id,author_id) VALUES (%s,%s)",
[self.edition.id, new_author.id],
)
self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Mozilla"))
def test_search_after_updated_author_name(self):
"""book found under new author name"""
self.assertEqual(self.edition, self._search_first("Name"))
self.assertFalse(self._search("Identifier"))
self.author.name = "Identifier"
self.author.save(broadcast=False)
self.assertFalse(self._search("Name"))
self.assertEqual(self.edition, self._search_first("Identifier"))
self.assertEqual(self.edition, self._search_first("Work"))
def _search_first(self, query):
"""wrapper around search_title_author"""
return self._search(query, return_first=True)
@staticmethod
def _search(query, *, return_first=False):
"""wrapper around search_title_author"""
return book_search.search_title_author(
query, min_confidence=0, return_first=return_first
)

View file

@ -1,77 +0,0 @@
""" django configuration of postgres """
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import models
@patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async")
class PostgresTriggers(TestCase):
"""special migrations, fancy stuff ya know"""
def test_search_vector_on_create(self, _):
"""make sure that search_vector is being set correctly on create"""
book = models.Edition.objects.create(title="The Long Goodbye")
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_on_update(self, _):
"""make sure that search_vector is being set correctly on edit"""
book = models.Edition.objects.create(title="The Long Goodbye")
book.title = "The Even Longer Goodbye"
book.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
def test_search_vector_fields(self, _):
"""use multiple fields to create search vector"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
subtitle="wow cool",
series="series name",
languages=["irrelevant"],
)
book.authors.add(author)
book.refresh_from_db()
# pylint: disable=line-too-long
self.assertEqual(
book.search_vector,
"'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
)
def test_search_vector_on_author_update(self, _):
"""update search when an author name changes"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
)
book.authors.add(author)
author.name = "Jeremy"
author.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
def test_search_vector_on_author_delete(self, _):
"""update search when an author name changes"""
author = models.Author.objects.create(name="Jeremy")
book = models.Edition.objects.create(
title="The Long Goodbye",
)
book.authors.add(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
book.authors.remove(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_stop_word_fallback(self, _):
"""use a fallback when removing stop words leads to an empty vector"""
book = models.Edition.objects.create(
title="there there",
)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'there':1A,2A")

23
bookwyrm/utils/db.py Normal file
View file

@ -0,0 +1,23 @@
""" Database utilities """
from typing import cast
import sqlparse # type: ignore
def format_trigger(sql: str) -> str:
"""format SQL trigger before storing
we remove whitespace and use consistent casing so as to avoid migrations
due to formatting changes.
"""
return cast(
str,
sqlparse.format(
sql,
strip_comments=True,
strip_whitespace=True,
use_space_around_operators=True,
keyword_case="upper",
identifier_case="lower",
),
)

View file

@ -10,6 +10,7 @@ django-compressor==4.4
django-csp==3.7
django-imagekit==4.1.0
django-model-utils==4.3.1
django-pgtrigger==4.11.0
django-redis==5.2.0
django-sass-processor==1.2.2
django-storages==1.13.2