From 7c15fbbb0b4f6af131a2f75902a2a9f205681f08 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 23 Jun 2021 16:14:59 -0700 Subject: [PATCH 01/21] More efficient search index Co-authored-by: asmr-hex <0.0@asmr.software> --- bookwyrm/connectors/self_connector.py | 13 +--- .../migrations/0077_auto_20210623_2155.py | 70 +++++++++++++++++++ bookwyrm/models/author.py | 6 ++ bookwyrm/models/book.py | 8 +++ 4 files changed, 87 insertions(+), 10 deletions(-) create mode 100644 bookwyrm/migrations/0077_auto_20210623_2155.py diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 930b7cb3d..9110b87ba 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -2,7 +2,7 @@ from functools import reduce import operator -from django.contrib.postgres.search import SearchRank, SearchVector +from django.contrib.postgres.search import SearchRank from django.db.models import OuterRef, Subquery, F, Q from bookwyrm import models @@ -141,16 +141,9 @@ def search_identifiers(query, *filters): def search_title_author(query, min_confidence, *filters): """searches for title and author""" - vector = ( - SearchVector("title", weight="A") - + SearchVector("subtitle", weight="B") - + SearchVector("authors__name", weight="C") - + SearchVector("series", weight="D") - ) - results = ( - models.Edition.objects.annotate(rank=SearchRank(vector, query)) - .filter(*filters, rank__gt=min_confidence) + models.Edition.objects.annotate(rank=SearchRank("search_vector", query)) + .filter(*filters, search_vector=query, rank__gt=min_confidence) .order_by("-rank") ) diff --git a/bookwyrm/migrations/0077_auto_20210623_2155.py b/bookwyrm/migrations/0077_auto_20210623_2155.py new file mode 100644 index 000000000..e41960d1e --- /dev/null +++ b/bookwyrm/migrations/0077_auto_20210623_2155.py @@ -0,0 +1,70 @@ +# Generated by Django 3.2.4 on 2021-06-23 21:55 + +import django.contrib.postgres.indexes +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0076_preview_images"), + ] + + operations = [ + migrations.AddField( + model_name="author", + name="search_vector", + field=django.contrib.postgres.search.SearchVectorField(null=True), + ), + migrations.AddField( + model_name="book", + name="search_vector", + field=django.contrib.postgres.search.SearchVectorField(null=True), + ), + migrations.AddIndex( + model_name="author", + index=django.contrib.postgres.indexes.GinIndex( + fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin" + ), + ), + migrations.AddIndex( + model_name="book", + index=django.contrib.postgres.indexes.GinIndex( + fields=["search_vector"], name="bookwyrm_bo_search__51beb3_gin" + ), + ), + migrations.RunSQL( + sql=""" + CREATE FUNCTION book_trigger() RETURNS trigger AS $$ + begin + new.search_vector := + setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.subtitle, '')), 'B') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.series, '')), 'D') || + (SELECT setweight(to_tsvector('pg_catalog.english', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') + FROM bookwyrm_book + LEFT OUTER JOIN bookwyrm_book_authors + ON bookwyrm_book.id = bookwyrm_book_authors.book_id + LEFT OUTER JOIN bookwyrm_author + ON bookwyrm_book_authors.author_id = bookwyrm_author.id + WHERE bookwyrm_book.id = new.id + ); + return new; + end + $$ LANGUAGE plpgsql; + + CREATE TRIGGER search_vector_trigger + BEFORE INSERT OR UPDATE OF title, subtitle, series, search_vector + ON bookwyrm_book + FOR EACH ROW EXECUTE FUNCTION book_trigger(); + + UPDATE bookwyrm_book SET search_vector = NULL; + """, + reverse_sql=""" + DROP TRIGGER IF EXISTS search_vector_trigger + ON bookwyrm_book; + DROP FUNCTION IF EXISTS book_trigger; + """, + ), + ] diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py index c4e26c5ab..6da80b176 100644 --- a/bookwyrm/models/author.py +++ b/bookwyrm/models/author.py @@ -1,4 +1,5 @@ """ database schema for info about authors """ +from django.contrib.postgres.indexes import GinIndex from django.db import models from bookwyrm import activitypub @@ -37,3 +38,8 @@ class Author(BookDataModel): return "https://%s/author/%s" % (DOMAIN, self.id) activity_serializer = activitypub.Author + + class Meta: + """sets up postgres GIN index field""" + + indexes = (GinIndex(fields=["search_vector"]),) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index d79ce206d..a6aa5de2d 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -1,6 +1,8 @@ """ database schema for books and shelves """ import re +from django.contrib.postgres.search import SearchVectorField +from django.contrib.postgres.indexes import GinIndex from django.db import models from django.dispatch import receiver from model_utils import FieldTracker @@ -34,6 +36,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel): bnf_id = fields.CharField( # Bibliothèque nationale de France max_length=255, blank=True, null=True, deduplication_field=True ) + search_vector = SearchVectorField(null=True) last_edited_by = fields.ForeignKey( "User", @@ -142,6 +145,11 @@ class Book(BookDataModel): self.title, ) + class Meta: + """sets up postgres GIN index field""" + + indexes = (GinIndex(fields=["search_vector"]),) + class Work(OrderedCollectionPageMixin, Book): """a work (an abstract concept of a book that manifests in an edition)""" From 6a6de44ebeb54cca6492ce9b1756859948d6fdba Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 23 Jun 2021 17:10:35 -0700 Subject: [PATCH 02/21] Attempt at getting tests running by pumbing psql version --- .github/workflows/django-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/django-tests.yml b/.github/workflows/django-tests.yml index b5b319f53..9043a89c6 100644 --- a/.github/workflows/django-tests.yml +++ b/.github/workflows/django-tests.yml @@ -20,7 +20,7 @@ jobs: services: postgres: - image: postgres:10 + image: postgres:13 env: POSTGRES_USER: postgres POSTGRES_PASSWORD: hunter2 From adb0253abfeed700ba208e3f5dca7fe907d33e58 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 23 Jun 2021 17:41:29 -0700 Subject: [PATCH 03/21] Remove default minimum search confidence --- bookwyrm/views/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/views/search.py b/bookwyrm/views/search.py index 274a3bc2e..d15fc6a87 100644 --- a/bookwyrm/views/search.py +++ b/bookwyrm/views/search.py @@ -23,7 +23,7 @@ class Search(View): def get(self, request): """that search bar up top""" query = request.GET.get("q") - min_confidence = request.GET.get("min_confidence", 0.1) + min_confidence = request.GET.get("min_confidence", 0) search_type = request.GET.get("type") search_remote = ( request.GET.get("remote", False) and request.user.is_authenticated From 04c51a65980820dfab41ffd730ccba33963d4d56 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 24 Jun 2021 10:01:07 -0700 Subject: [PATCH 04/21] Uses simple dictionary for names --- bookwyrm/migrations/0077_auto_20210623_2155.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/migrations/0077_auto_20210623_2155.py b/bookwyrm/migrations/0077_auto_20210623_2155.py index e41960d1e..4773e6bd9 100644 --- a/bookwyrm/migrations/0077_auto_20210623_2155.py +++ b/bookwyrm/migrations/0077_auto_20210623_2155.py @@ -42,7 +42,7 @@ class Migration(migrations.Migration): setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') || setweight(to_tsvector('pg_catalog.english', coalesce(new.subtitle, '')), 'B') || setweight(to_tsvector('pg_catalog.english', coalesce(new.series, '')), 'D') || - (SELECT setweight(to_tsvector('pg_catalog.english', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') + (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_book LEFT OUTER JOIN bookwyrm_book_authors ON bookwyrm_book.id = bookwyrm_book_authors.book_id From deb1b9943a46f709899e9a5e198e2286e4ca8c1b Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 24 Jun 2021 10:56:27 -0700 Subject: [PATCH 05/21] Uses simple dictionary as backup for english dictionary Creates search vectors with stop words if the english version is empty, and uses a SearchQuery OR with simple and english on the view side. --- bookwyrm/connectors/self_connector.py | 3 ++- bookwyrm/migrations/0077_auto_20210623_2155.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 9110b87ba..635ee65cd 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -2,7 +2,7 @@ from functools import reduce import operator -from django.contrib.postgres.search import SearchRank +from django.contrib.postgres.search import SearchRank, SearchQuery from django.db.models import OuterRef, Subquery, F, Q from bookwyrm import models @@ -141,6 +141,7 @@ def search_identifiers(query, *filters): def search_title_author(query, min_confidence, *filters): """searches for title and author""" + query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") results = ( models.Edition.objects.annotate(rank=SearchRank("search_vector", query)) .filter(*filters, search_vector=query, rank__gt=min_confidence) diff --git a/bookwyrm/migrations/0077_auto_20210623_2155.py b/bookwyrm/migrations/0077_auto_20210623_2155.py index 4773e6bd9..d72f83631 100644 --- a/bookwyrm/migrations/0077_auto_20210623_2155.py +++ b/bookwyrm/migrations/0077_auto_20210623_2155.py @@ -39,17 +39,20 @@ class Migration(migrations.Migration): CREATE FUNCTION book_trigger() RETURNS trigger AS $$ begin new.search_vector := - setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') || - setweight(to_tsvector('pg_catalog.english', coalesce(new.subtitle, '')), 'B') || - setweight(to_tsvector('pg_catalog.english', coalesce(new.series, '')), 'D') || - (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') + coalesce( + NULLIF(setweight(to_tsvector('english', coalesce(new.title, '')), 'A'), ''), + setweight(to_tsvector('simple', coalesce(new.title, '')), 'A') + ) || + setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || + (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'B') FROM bookwyrm_book LEFT OUTER JOIN bookwyrm_book_authors ON bookwyrm_book.id = bookwyrm_book_authors.book_id LEFT OUTER JOIN bookwyrm_author ON bookwyrm_book_authors.author_id = bookwyrm_author.id WHERE bookwyrm_book.id = new.id - ); + ) || + setweight(to_tsvector('english', coalesce(new.series, '')), 'D'); return new; end $$ LANGUAGE plpgsql; From b4b2105527090cb1fd2d783d48ee266828af1c33 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 08:54:52 -0700 Subject: [PATCH 06/21] Fixes edit book form --- bookwyrm/forms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bookwyrm/forms.py b/bookwyrm/forms.py index cb55d229e..f18ee54a6 100644 --- a/bookwyrm/forms.py +++ b/bookwyrm/forms.py @@ -183,6 +183,7 @@ class EditionForm(CustomForm): "parent_work", "shelves", "connector", + "search_vector", ] From 3998aa5f2a3b24b22054a687801aa07e7dedafdc Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 09:05:00 -0700 Subject: [PATCH 07/21] Fixes edit author form --- bookwyrm/forms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bookwyrm/forms.py b/bookwyrm/forms.py index f18ee54a6..57a94e3cd 100644 --- a/bookwyrm/forms.py +++ b/bookwyrm/forms.py @@ -195,6 +195,7 @@ class AuthorForm(CustomForm): "origin_id", "created_date", "updated_date", + "search_vector", ] From 61785cee66448112d4917a2904e4d3481d9391d0 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 09:12:23 -0700 Subject: [PATCH 08/21] Set defualt min confidence to 0 --- bookwyrm/connectors/self_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 635ee65cd..36409bc5b 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -13,7 +13,7 @@ class Connector(AbstractConnector): """instantiate a connector""" # pylint: disable=arguments-differ - def search(self, query, min_confidence=0.1, raw=False, filters=None): + def search(self, query, min_confidence=0, raw=False, filters=None): """search your local database""" filters = filters or [] if not query: From 2fa5fabb7bc169ff167a39de83c7b5401e73a8fb Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 09:57:56 -0700 Subject: [PATCH 09/21] Fixes default edition ranking test --- .../tests/connectors/test_self_connector.py | 104 +++++++++--------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/bookwyrm/tests/connectors/test_self_connector.py b/bookwyrm/tests/connectors/test_self_connector.py index 4f2173085..11c15826a 100644 --- a/bookwyrm/tests/connectors/test_self_connector.py +++ b/bookwyrm/tests/connectors/test_self_connector.py @@ -43,68 +43,68 @@ class SelfConnector(TestCase): self.assertEqual(result.year, 1980) self.assertEqual(result.connector, self.connector) - def test_search_rank(self): + @patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay") + def test_search_rank(self, _): """prioritize certain results""" author = models.Author.objects.create(name="Anonymous") - with patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay"): - edition = models.Edition.objects.create( - title="Edition of Example Work", - published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc), - parent_work=models.Work.objects.create(title=""), - ) - # author text is rank C - edition.authors.add(author) + edition = models.Edition.objects.create( + title="Edition of Example Work", + published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc), + parent_work=models.Work.objects.create(title=""), + ) + # author text is rank B + edition.authors.add(author) - # series is rank D - models.Edition.objects.create( - title="Another Edition", - series="Anonymous", - parent_work=models.Work.objects.create(title=""), - ) - # subtitle is rank B - models.Edition.objects.create( - title="More Editions", - subtitle="The Anonymous Edition", - parent_work=models.Work.objects.create(title=""), - ) - # title is rank A - models.Edition.objects.create(title="Anonymous") - # doesn't rank in this search - edition = models.Edition.objects.create( - title="An Edition", parent_work=models.Work.objects.create(title="") - ) + # series is rank D + models.Edition.objects.create( + title="Another Edition", + series="Anonymous", + parent_work=models.Work.objects.create(title=""), + ) + # subtitle is rank B + models.Edition.objects.create( + title="More Editions", + subtitle="The Anonymous Edition", + parent_work=models.Work.objects.create(title=""), + ) + # title is rank A + models.Edition.objects.create(title="Anonymous") + # doesn't rank in this search + edition = models.Edition.objects.create( + title="An Edition", parent_work=models.Work.objects.create(title="") + ) - results = self.connector.search("Anonymous") + results = self.connector.search("Anonymous") self.assertEqual(len(results), 3) self.assertEqual(results[0].title, "Anonymous") self.assertEqual(results[1].title, "More Editions") self.assertEqual(results[2].title, "Edition of Example Work") - def test_search_multiple_editions(self): + @patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay") + def test_search_multiple_editions(self, _): """it should get rid of duplicate editions for the same work""" - with patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay"): - work = models.Work.objects.create(title="Work Title") - edition_1 = models.Edition.objects.create( - title="Edition 1 Title", parent_work=work - ) - edition_2 = models.Edition.objects.create( - title="Edition 2 Title", - parent_work=work, - edition_rank=20, # that's default babey - ) - edition_3 = models.Edition.objects.create(title="Fish", parent_work=work) + work = models.Work.objects.create(title="Work Title") + edition_1 = models.Edition.objects.create( + title="Edition 1 Title", parent_work=work + ) + edition_2 = models.Edition.objects.create( + title="Edition 2 Title", + parent_work=work, + isbn_13="123456789", # this is now the defualt edition + ) + edition_3 = models.Edition.objects.create(title="Fish", parent_work=work) - # pick the best edition - results = self.connector.search("Edition 1 Title") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].key, edition_1.remote_id) + # pick the best edition + results = self.connector.search("Edition 1 Title") + self.assertEqual(len(results), 1) + self.assertEqual(results[0].key, edition_1.remote_id) - # pick the default edition when no match is best - results = self.connector.search("Edition Title") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].key, edition_2.remote_id) + # pick the default edition when no match is best + results = self.connector.search("Edition Title") + self.assertEqual(len(results), 1) + self.assertEqual(results[0].key, edition_2.remote_id) - # only matches one edition, so no deduplication takes place - results = self.connector.search("Fish") - self.assertEqual(len(results), 1) - self.assertEqual(results[0].key, edition_3.remote_id) + # only matches one edition, so no deduplication takes place + results = self.connector.search("Fish") + self.assertEqual(len(results), 1) + self.assertEqual(results[0].key, edition_3.remote_id) From 68fc5f2b5a79320c523ccb00ef6ae488cee4e808 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 12:33:15 -0700 Subject: [PATCH 10/21] Trigger search vector update when author is edited --- .../migrations/0077_auto_20210623_2155.py | 53 +++++++++++++++ bookwyrm/tests/models/test_book_model.py | 3 +- bookwyrm/tests/test_postgres.py | 66 +++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 bookwyrm/tests/test_postgres.py diff --git a/bookwyrm/migrations/0077_auto_20210623_2155.py b/bookwyrm/migrations/0077_auto_20210623_2155.py index d72f83631..c915badd0 100644 --- a/bookwyrm/migrations/0077_auto_20210623_2155.py +++ b/bookwyrm/migrations/0077_auto_20210623_2155.py @@ -70,4 +70,57 @@ class Migration(migrations.Migration): DROP FUNCTION IF EXISTS book_trigger; """, ), + # when an author is edited + migrations.RunSQL( + sql=""" + CREATE FUNCTION author_trigger() RETURNS trigger AS $$ + begin + WITH book AS ( + SELECT bookwyrm_book.id as row_id + FROM bookwyrm_author + LEFT OUTER JOIN bookwyrm_book_authors + ON bookwyrm_book_authors.id = new.id + LEFT OUTER JOIN bookwyrm_book + ON bookwyrm_book.id = bookwyrm_book_authors.book_id + ) + UPDATE bookwyrm_book SET search_vector = '' + FROM book + WHERE id = book.row_id; + return new; + end + $$ LANGUAGE plpgsql; + + CREATE TRIGGER author_search_vector_trigger + AFTER UPDATE OF name + ON bookwyrm_author + FOR EACH ROW EXECUTE FUNCTION author_trigger(); + """, + reverse_sql=""" + DROP TRIGGER IF EXISTS author_search_vector_trigger + ON bookwyrm_author; + DROP FUNCTION IF EXISTS author_trigger; + """, + ), + # when an author is added to or removed from a book + migrations.RunSQL( + sql=""" + CREATE FUNCTION book_authors_trigger() RETURNS trigger AS $$ + begin + UPDATE bookwyrm_book SET search_vector = '' + WHERE id = coalesce(new.book_id, old.book_id); + return new; + end + $$ LANGUAGE plpgsql; + + CREATE TRIGGER book_authors_search_vector_trigger + AFTER INSERT OR DELETE + ON bookwyrm_book_authors + FOR EACH ROW EXECUTE FUNCTION book_authors_trigger(); + """, + reverse_sql=""" + DROP TRIGGER IF EXISTS book_authors_search_vector_trigger + ON bookwyrm_book_authors; + DROP FUNCTION IF EXISTS book_authors_trigger; + """, + ), ] diff --git a/bookwyrm/tests/models/test_book_model.py b/bookwyrm/tests/models/test_book_model.py index df61514c1..9408220e0 100644 --- a/bookwyrm/tests/models/test_book_model.py +++ b/bookwyrm/tests/models/test_book_model.py @@ -1,8 +1,9 @@ """ testing models """ +from unittest.mock import patch + from dateutil.parser import parse from django.test import TestCase from django.utils import timezone -from unittest.mock import patch from bookwyrm import models, settings from bookwyrm.models.book import isbn_10_to_13, isbn_13_to_10 diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py new file mode 100644 index 000000000..b027ce021 --- /dev/null +++ b/bookwyrm/tests/test_postgres.py @@ -0,0 +1,66 @@ +""" django configuration of postgres """ +from django.test import TestCase + +from bookwyrm import models + + +class PostgresTriggers(TestCase): + """special migrations, fancy stuff ya know""" + + def test_search_vector_on_create(self): + """make sure that search_vector is being set correctly on create""" + book = models.Edition.objects.create(title="The Long Goodbye") + book.refresh_from_db() + self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") + + def test_search_vector_on_update(self): + """make sure that search_vector is being set correctly on edit""" + book = models.Edition.objects.create(title="The Long Goodbye") + book.title = "The Even Longer Goodbye" + book.save(broadcast=False) + book.refresh_from_db() + self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A") + + def test_search_vector_fields(self): + """use multiple fields to create search vector""" + author = models.Author.objects.create(name="The Rays") + book = models.Edition.objects.create( + title="The Long Goodbye", + subtitle="wow cool", + series="series name", + languages=["irrelevent"], + ) + book.authors.add(author) + book.refresh_from_db() + self.assertEqual( + book.search_vector, + "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7B 'seri':8 'the':6B 'wow':4B", + ) + + def test_seach_vector_on_author_update(self): + """update search when an author name changes""" + author = models.Author.objects.create(name="The Rays") + book = models.Edition.objects.create( + title="The Long Goodbye", + ) + book.authors.add(author) + author.name = "Jeremy" + author.save(broadcast=False) + book.refresh_from_db() + + self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4B 'long':2A") + + def test_seach_vector_on_author_delete(self): + """update search when an author name changes""" + author = models.Author.objects.create(name="Jeremy") + book = models.Edition.objects.create( + title="The Long Goodbye", + ) + + book.authors.add(author) + book.refresh_from_db() + self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4B 'long':2A") + + book.authors.remove(author) + book.refresh_from_db() + self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") From e1687204205da60e19e99851f55705121c400ff5 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 12:42:36 -0700 Subject: [PATCH 11/21] Adds test for stop word null state --- bookwyrm/tests/test_postgres.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index b027ce021..423eb1131 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -64,3 +64,11 @@ class PostgresTriggers(TestCase): book.authors.remove(author) book.refresh_from_db() self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") + + def test_search_vector_stop_word_fallback(self): + """use a fallback when removing stop words leads to an empty vector""" + book = models.Edition.objects.create( + title="there there", + ) + book.refresh_from_db() + self.assertEqual(book.search_vector, "'there':1A,2A") From 62b5a00102935b9dd642954ddc0ea3109c641105 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 13:13:12 -0700 Subject: [PATCH 12/21] Adds test mocks from psql tests --- bookwyrm/tests/test_postgres.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index 423eb1131..a597ce604 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -1,19 +1,20 @@ """ django configuration of postgres """ +from unittest import patch from django.test import TestCase from bookwyrm import models - +@patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay") class PostgresTriggers(TestCase): """special migrations, fancy stuff ya know""" - def test_search_vector_on_create(self): + def test_search_vector_on_create(self, _): """make sure that search_vector is being set correctly on create""" book = models.Edition.objects.create(title="The Long Goodbye") book.refresh_from_db() self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") - def test_search_vector_on_update(self): + def test_search_vector_on_update(self, _): """make sure that search_vector is being set correctly on edit""" book = models.Edition.objects.create(title="The Long Goodbye") book.title = "The Even Longer Goodbye" @@ -21,7 +22,7 @@ class PostgresTriggers(TestCase): book.refresh_from_db() self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A") - def test_search_vector_fields(self): + def test_search_vector_fields(self, _): """use multiple fields to create search vector""" author = models.Author.objects.create(name="The Rays") book = models.Edition.objects.create( @@ -37,7 +38,7 @@ class PostgresTriggers(TestCase): "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7B 'seri':8 'the':6B 'wow':4B", ) - def test_seach_vector_on_author_update(self): + def test_seach_vector_on_author_update(self, _): """update search when an author name changes""" author = models.Author.objects.create(name="The Rays") book = models.Edition.objects.create( @@ -50,7 +51,7 @@ class PostgresTriggers(TestCase): self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4B 'long':2A") - def test_seach_vector_on_author_delete(self): + def test_seach_vector_on_author_delete(self, _): """update search when an author name changes""" author = models.Author.objects.create(name="Jeremy") book = models.Edition.objects.create( @@ -65,7 +66,7 @@ class PostgresTriggers(TestCase): book.refresh_from_db() self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") - def test_search_vector_stop_word_fallback(self): + def test_search_vector_stop_word_fallback(self, _): """use a fallback when removing stop words leads to an empty vector""" book = models.Edition.objects.create( title="there there", From 789f823d5d561fa4cf906d830322076a4aaac212 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 13:20:23 -0700 Subject: [PATCH 13/21] Fixes python formatting --- bookwyrm/tests/test_postgres.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index a597ce604..cb9e7e86d 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -4,6 +4,7 @@ from django.test import TestCase from bookwyrm import models + @patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay") class PostgresTriggers(TestCase): """special migrations, fancy stuff ya know""" From 6470033ac9f2f8f663b2fd8a6022cc9680708e97 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 16:20:18 -0700 Subject: [PATCH 14/21] Fixes search query syntax mad about the lack of django docs on this, jeez --- bookwyrm/connectors/self_connector.py | 2 +- bookwyrm/migrations/0077_auto_20210623_2155.py | 2 +- bookwyrm/tests/connectors/test_self_connector.py | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 36409bc5b..b5492b737 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -143,7 +143,7 @@ def search_title_author(query, min_confidence, *filters): """searches for title and author""" query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") results = ( - models.Edition.objects.annotate(rank=SearchRank("search_vector", query)) + models.Edition.objects.annotate(rank=SearchRank(F("search_vector"), query)) .filter(*filters, search_vector=query, rank__gt=min_confidence) .order_by("-rank") ) diff --git a/bookwyrm/migrations/0077_auto_20210623_2155.py b/bookwyrm/migrations/0077_auto_20210623_2155.py index c915badd0..a73c43825 100644 --- a/bookwyrm/migrations/0077_auto_20210623_2155.py +++ b/bookwyrm/migrations/0077_auto_20210623_2155.py @@ -44,7 +44,7 @@ class Migration(migrations.Migration): setweight(to_tsvector('simple', coalesce(new.title, '')), 'A') ) || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || - (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'B') + (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C') FROM bookwyrm_book LEFT OUTER JOIN bookwyrm_book_authors ON bookwyrm_book.id = bookwyrm_book_authors.book_id diff --git a/bookwyrm/tests/connectors/test_self_connector.py b/bookwyrm/tests/connectors/test_self_connector.py index 11c15826a..02c0c9a4f 100644 --- a/bookwyrm/tests/connectors/test_self_connector.py +++ b/bookwyrm/tests/connectors/test_self_connector.py @@ -70,15 +70,16 @@ class SelfConnector(TestCase): # title is rank A models.Edition.objects.create(title="Anonymous") # doesn't rank in this search - edition = models.Edition.objects.create( + models.Edition.objects.create( title="An Edition", parent_work=models.Work.objects.create(title="") ) results = self.connector.search("Anonymous") - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 4) self.assertEqual(results[0].title, "Anonymous") self.assertEqual(results[1].title, "More Editions") self.assertEqual(results[2].title, "Edition of Example Work") + self.assertEqual(results[3].title, "Another Edition") @patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay") def test_search_multiple_editions(self, _): From 36fe64c3ae565eeb9642b9f401844810243669f9 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 16:48:57 -0700 Subject: [PATCH 15/21] Fixes patch import in test --- bookwyrm/tests/test_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index cb9e7e86d..7f5cfa699 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -1,5 +1,5 @@ """ django configuration of postgres """ -from unittest import patch +from unittest.mock import patch from django.test import TestCase from bookwyrm import models From a3badc5700ddf15bdd24288242fb4f0516c925cf Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 18:36:17 -0700 Subject: [PATCH 16/21] Adds preview mock to postgres tests --- bookwyrm/tests/test_postgres.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index 7f5cfa699..1cdc4e833 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -6,16 +6,18 @@ from bookwyrm import models @patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay") +@patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay") class PostgresTriggers(TestCase): """special migrations, fancy stuff ya know""" - def test_search_vector_on_create(self, _): + def test_search_vector_on_create(self, *_): """make sure that search_vector is being set correctly on create""" + print('hello?') book = models.Edition.objects.create(title="The Long Goodbye") book.refresh_from_db() self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") - def test_search_vector_on_update(self, _): + def test_search_vector_on_update(self, *_): """make sure that search_vector is being set correctly on edit""" book = models.Edition.objects.create(title="The Long Goodbye") book.title = "The Even Longer Goodbye" @@ -23,7 +25,7 @@ class PostgresTriggers(TestCase): book.refresh_from_db() self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A") - def test_search_vector_fields(self, _): + def test_search_vector_fields(self, *_): """use multiple fields to create search vector""" author = models.Author.objects.create(name="The Rays") book = models.Edition.objects.create( @@ -39,7 +41,7 @@ class PostgresTriggers(TestCase): "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7B 'seri':8 'the':6B 'wow':4B", ) - def test_seach_vector_on_author_update(self, _): + def test_seach_vector_on_author_update(self, *_): """update search when an author name changes""" author = models.Author.objects.create(name="The Rays") book = models.Edition.objects.create( @@ -52,7 +54,7 @@ class PostgresTriggers(TestCase): self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4B 'long':2A") - def test_seach_vector_on_author_delete(self, _): + def test_seach_vector_on_author_delete(self, *_): """update search when an author name changes""" author = models.Author.objects.create(name="Jeremy") book = models.Edition.objects.create( @@ -67,7 +69,7 @@ class PostgresTriggers(TestCase): book.refresh_from_db() self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") - def test_search_vector_stop_word_fallback(self, _): + def test_search_vector_stop_word_fallback(self, *_): """use a fallback when removing stop words leads to an empty vector""" book = models.Edition.objects.create( title="there there", From 8844e462591160f728f7ce941581ac49a39cf0d4 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 18:40:53 -0700 Subject: [PATCH 17/21] Removes stray print statement --- bookwyrm/tests/test_postgres.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index 1cdc4e833..93ff264fb 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -12,7 +12,6 @@ class PostgresTriggers(TestCase): def test_search_vector_on_create(self, *_): """make sure that search_vector is being set correctly on create""" - print('hello?') book = models.Edition.objects.create(title="The Long Goodbye") book.refresh_from_db() self.assertEqual(book.search_vector, "'goodby':3A 'long':2A") From 62b6bfe1ee2e3dec27664e42ba2045278dcb0847 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 18:55:09 -0700 Subject: [PATCH 18/21] Annotation and filtering order --- bookwyrm/connectors/self_connector.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index b5492b737..8d5a7614e 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -143,8 +143,9 @@ def search_title_author(query, min_confidence, *filters): """searches for title and author""" query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") results = ( - models.Edition.objects.annotate(rank=SearchRank(F("search_vector"), query)) - .filter(*filters, search_vector=query, rank__gt=min_confidence) + models.Edition.objects.filter(*filters, search_vector=query) + .annotate(rank=SearchRank(F("search_vector"), query)) + .filter(rank__gt=min_confidence) .order_by("-rank") ) From 3238be474885a30541e50ed3fe1035e53e0586c0 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 26 Jun 2021 19:07:09 -0700 Subject: [PATCH 19/21] Restores weights to original calculation --- bookwyrm/tests/test_postgres.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bookwyrm/tests/test_postgres.py b/bookwyrm/tests/test_postgres.py index 93ff264fb..98385d34b 100644 --- a/bookwyrm/tests/test_postgres.py +++ b/bookwyrm/tests/test_postgres.py @@ -37,7 +37,7 @@ class PostgresTriggers(TestCase): book.refresh_from_db() self.assertEqual( book.search_vector, - "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7B 'seri':8 'the':6B 'wow':4B", + "'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B", ) def test_seach_vector_on_author_update(self, *_): @@ -51,7 +51,7 @@ class PostgresTriggers(TestCase): author.save(broadcast=False) book.refresh_from_db() - self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4B 'long':2A") + self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A") def test_seach_vector_on_author_delete(self, *_): """update search when an author name changes""" @@ -62,7 +62,7 @@ class PostgresTriggers(TestCase): book.authors.add(author) book.refresh_from_db() - self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4B 'long':2A") + self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A") book.authors.remove(author) book.refresh_from_db() From fe5f8b3f0e2c05126e067aeb130be164e2598df9 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 27 Jun 2021 07:04:37 -0700 Subject: [PATCH 20/21] Test is passing locally and failing in CI Trying out different yaml params --- .github/workflows/django-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/django-tests.yml b/.github/workflows/django-tests.yml index 9043a89c6..80bfa5f72 100644 --- a/.github/workflows/django-tests.yml +++ b/.github/workflows/django-tests.yml @@ -20,7 +20,7 @@ jobs: services: postgres: - image: postgres:13 + image: postgres:12 env: POSTGRES_USER: postgres POSTGRES_PASSWORD: hunter2 @@ -66,4 +66,4 @@ jobs: EMAIL_USE_TLS: true ENABLE_PREVIEW_IMAGES: true run: | - python manage.py test + python manage.py pytest From 55aa67cc00c3fd85da2c502704c479f5350c05e0 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 27 Jun 2021 07:31:01 -0700 Subject: [PATCH 21/21] Fixes pytest syntax --- .github/workflows/django-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/django-tests.yml b/.github/workflows/django-tests.yml index 80bfa5f72..c11b7c408 100644 --- a/.github/workflows/django-tests.yml +++ b/.github/workflows/django-tests.yml @@ -66,4 +66,4 @@ jobs: EMAIL_USE_TLS: true ENABLE_PREVIEW_IMAGES: true run: | - python manage.py pytest + pytest