From b5805accacb497be62cb4e7bb54a52258a1d12e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adeodato=20Sim=C3=B3?= Date: Sat, 25 Nov 2023 21:47:13 -0300 Subject: [PATCH] Minor improvements to bookwyrm_book trigger code - do not COALESCE columns that cannot be NULL - do not bring bookwyrm_book to author names JOIN - add comments documenting the four steps --- ...grate_search_vec_triggers_to_pgtriggers.py | 4 ++-- bookwyrm/models/book.py | 21 ++++++++++--------- bookwyrm/tests/test_book_search.py | 16 +++++++++----- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/bookwyrm/migrations/0191_migrate_search_vec_triggers_to_pgtriggers.py b/bookwyrm/migrations/0191_migrate_search_vec_triggers_to_pgtriggers.py index ddfe74a8b..5e798b654 100644 --- a/bookwyrm/migrations/0191_migrate_search_vec_triggers_to_pgtriggers.py +++ b/bookwyrm/migrations/0191_migrate_search_vec_triggers_to_pgtriggers.py @@ -32,8 +32,8 @@ class Migration(migrations.Migration): trigger=pgtrigger.compiler.Trigger( name="update_search_vector_on_book_edit", sql=pgtrigger.compiler.UpsertTriggerSql( - func="new.search_vector := coalesce(nullif(setweight(to_tsvector('english', coalesce(new.title, '')), 'A'), ''), setweight(to_tsvector('simple', coalesce(new.title, '')), 'A')) || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_book LEFT OUTER JOIN bookwyrm_book_authors ON bookwyrm_book.id = bookwyrm_book_authors.book_id LEFT OUTER JOIN bookwyrm_author ON bookwyrm_book_authors.author_id = bookwyrm_author.id WHERE bookwyrm_book.id = new.id ) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D');RETURN NEW;", - hash="9c898d46dfb7492ecd18f6c692bbecfa548f0e85", + func="new.search_vector := setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D');RETURN NEW;", + hash="77d6399497c0a89b0bf09d296e33c396da63705c", operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"', pgid="pgtrigger_update_search_vector_on_book_edit_bec58", table="bookwyrm_book", diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index ed26752e3..e167e2138 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -247,19 +247,20 @@ class Book(BookDataModel): | pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"), func=format_trigger( """new.search_vector := - COALESCE( - NULLIF(setweight(to_tsvector('english', COALESCE(new.title, '')), 'A'), ''), - setweight(to_tsvector('simple', COALESCE(new.title, '')), 'A') - ) || + -- title, with priority A (parse in English, default to simple if empty) + setweight(COALESCE(nullif( + to_tsvector('english', new.title), ''), + to_tsvector('simple', new.title)), 'A') || + -- subtitle, with priority B (always in English?) setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') || + -- list of authors, with priority C (TODO: add aliases?, bookwyrm-social#3063) (SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(bookwyrm_author.name), ' '), '')), 'C') - FROM bookwyrm_book - LEFT OUTER JOIN bookwyrm_book_authors - ON bookwyrm_book.id = bookwyrm_book_authors.book_id - LEFT OUTER JOIN bookwyrm_author - ON bookwyrm_book_authors.author_id = bookwyrm_author.id - WHERE bookwyrm_book.id = new.id + FROM bookwyrm_author + LEFT JOIN bookwyrm_book_authors + ON bookwyrm_author.id = bookwyrm_book_authors.author_id + WHERE bookwyrm_book_authors.book_id = new.id ) || + --- last: series name, with lowest priority setweight(to_tsvector('english', COALESCE(new.series, '')), 'D'); RETURN new; """ diff --git a/bookwyrm/tests/test_book_search.py b/bookwyrm/tests/test_book_search.py index 99b62f0a0..e9f550a88 100644 --- a/bookwyrm/tests/test_book_search.py +++ b/bookwyrm/tests/test_book_search.py @@ -178,15 +178,17 @@ class SearchVectorTest(TestCase): book = self._create_book("Hear We Come", "John") self.assertEqual(book.search_vector, "'come':3A 'hear':1A 'john':4C") + def test_search_vector_no_author(self): + """book with no authors gets processed normally""" + book = self._create_book("Book", None, series="Bunch") + self.assertEqual(book.search_vector, "'book':1A 'bunch':2") + @staticmethod def _create_book( title, author_name, /, *, subtitle="", series="", author_alias=None ): """quickly create a book""" work = models.Work.objects.create(title="work") - author = models.Author.objects.create( - name=author_name, aliases=author_alias or [] - ) edition = models.Edition.objects.create( title=title, series=series or None, @@ -194,8 +196,12 @@ class SearchVectorTest(TestCase): isbn_10="0000000000", parent_work=work, ) - edition.authors.add(author) - edition.save(broadcast=False) + if author_name is not None: + author = models.Author.objects.create( + name=author_name, aliases=author_alias or [] + ) + edition.authors.add(author) + edition.save(broadcast=False) edition.refresh_from_db() return edition