More efficient search index

Co-authored-by: asmr-hex <0.0@asmr.software>
This commit is contained in:
Mouse Reeve 2021-06-23 16:14:59 -07:00
parent ae1d0343ba
commit 7c15fbbb0b
4 changed files with 87 additions and 10 deletions

View file

@ -2,7 +2,7 @@
from functools import reduce
import operator
from django.contrib.postgres.search import SearchRank, SearchVector
from django.contrib.postgres.search import SearchRank
from django.db.models import OuterRef, Subquery, F, Q
from bookwyrm import models
@ -141,16 +141,9 @@ def search_identifiers(query, *filters):
def search_title_author(query, min_confidence, *filters):
"""searches for title and author"""
vector = (
SearchVector("title", weight="A")
+ SearchVector("subtitle", weight="B")
+ SearchVector("authors__name", weight="C")
+ SearchVector("series", weight="D")
)
results = (
models.Edition.objects.annotate(rank=SearchRank(vector, query))
.filter(*filters, rank__gt=min_confidence)
models.Edition.objects.annotate(rank=SearchRank("search_vector", query))
.filter(*filters, search_vector=query, rank__gt=min_confidence)
.order_by("-rank")
)

View file

@ -0,0 +1,70 @@
# Generated by Django 3.2.4 on 2021-06-23 21:55
import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0076_preview_images"),
]
operations = [
migrations.AddField(
model_name="author",
name="search_vector",
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name="book",
name="search_vector",
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddIndex(
model_name="author",
index=django.contrib.postgres.indexes.GinIndex(
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
),
),
migrations.AddIndex(
model_name="book",
index=django.contrib.postgres.indexes.GinIndex(
fields=["search_vector"], name="bookwyrm_bo_search__51beb3_gin"
),
),
migrations.RunSQL(
sql="""
CREATE FUNCTION book_trigger() RETURNS trigger AS $$
begin
new.search_vector :=
setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.subtitle, '')), 'B') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.series, '')), 'D') ||
(SELECT setweight(to_tsvector('pg_catalog.english', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C')
FROM bookwyrm_book
LEFT OUTER JOIN bookwyrm_book_authors
ON bookwyrm_book.id = bookwyrm_book_authors.book_id
LEFT OUTER JOIN bookwyrm_author
ON bookwyrm_book_authors.author_id = bookwyrm_author.id
WHERE bookwyrm_book.id = new.id
);
return new;
end
$$ LANGUAGE plpgsql;
CREATE TRIGGER search_vector_trigger
BEFORE INSERT OR UPDATE OF title, subtitle, series, search_vector
ON bookwyrm_book
FOR EACH ROW EXECUTE FUNCTION book_trigger();
UPDATE bookwyrm_book SET search_vector = NULL;
""",
reverse_sql="""
DROP TRIGGER IF EXISTS search_vector_trigger
ON bookwyrm_book;
DROP FUNCTION IF EXISTS book_trigger;
""",
),
]

View file

@ -1,4 +1,5 @@
""" database schema for info about authors """
from django.contrib.postgres.indexes import GinIndex
from django.db import models
from bookwyrm import activitypub
@ -37,3 +38,8 @@ class Author(BookDataModel):
return "https://%s/author/%s" % (DOMAIN, self.id)
activity_serializer = activitypub.Author
class Meta:
"""sets up postgres GIN index field"""
indexes = (GinIndex(fields=["search_vector"]),)

View file

@ -1,6 +1,8 @@
""" database schema for books and shelves """
import re
from django.contrib.postgres.search import SearchVectorField
from django.contrib.postgres.indexes import GinIndex
from django.db import models
from django.dispatch import receiver
from model_utils import FieldTracker
@ -34,6 +36,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
bnf_id = fields.CharField( # Bibliothèque nationale de France
max_length=255, blank=True, null=True, deduplication_field=True
)
search_vector = SearchVectorField(null=True)
last_edited_by = fields.ForeignKey(
"User",
@ -142,6 +145,11 @@ class Book(BookDataModel):
self.title,
)
class Meta:
"""sets up postgres GIN index field"""
indexes = (GinIndex(fields=["search_vector"]),)
class Work(OrderedCollectionPageMixin, Book):
"""a work (an abstract concept of a book that manifests in an edition)"""