mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-23 10:01:04 +00:00
More efficient search index
Co-authored-by: asmr-hex <0.0@asmr.software>
This commit is contained in:
parent
ae1d0343ba
commit
7c15fbbb0b
4 changed files with 87 additions and 10 deletions
|
@ -2,7 +2,7 @@
|
|||
from functools import reduce
|
||||
import operator
|
||||
|
||||
from django.contrib.postgres.search import SearchRank, SearchVector
|
||||
from django.contrib.postgres.search import SearchRank
|
||||
from django.db.models import OuterRef, Subquery, F, Q
|
||||
|
||||
from bookwyrm import models
|
||||
|
@ -141,16 +141,9 @@ def search_identifiers(query, *filters):
|
|||
|
||||
def search_title_author(query, min_confidence, *filters):
|
||||
"""searches for title and author"""
|
||||
vector = (
|
||||
SearchVector("title", weight="A")
|
||||
+ SearchVector("subtitle", weight="B")
|
||||
+ SearchVector("authors__name", weight="C")
|
||||
+ SearchVector("series", weight="D")
|
||||
)
|
||||
|
||||
results = (
|
||||
models.Edition.objects.annotate(rank=SearchRank(vector, query))
|
||||
.filter(*filters, rank__gt=min_confidence)
|
||||
models.Edition.objects.annotate(rank=SearchRank("search_vector", query))
|
||||
.filter(*filters, search_vector=query, rank__gt=min_confidence)
|
||||
.order_by("-rank")
|
||||
)
|
||||
|
||||
|
|
70
bookwyrm/migrations/0077_auto_20210623_2155.py
Normal file
70
bookwyrm/migrations/0077_auto_20210623_2155.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
# Generated by Django 3.2.4 on 2021-06-23 21:55
|
||||
|
||||
import django.contrib.postgres.indexes
|
||||
import django.contrib.postgres.search
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("bookwyrm", "0076_preview_images"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="author",
|
||||
name="search_vector",
|
||||
field=django.contrib.postgres.search.SearchVectorField(null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="book",
|
||||
name="search_vector",
|
||||
field=django.contrib.postgres.search.SearchVectorField(null=True),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="author",
|
||||
index=django.contrib.postgres.indexes.GinIndex(
|
||||
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="book",
|
||||
index=django.contrib.postgres.indexes.GinIndex(
|
||||
fields=["search_vector"], name="bookwyrm_bo_search__51beb3_gin"
|
||||
),
|
||||
),
|
||||
migrations.RunSQL(
|
||||
sql="""
|
||||
CREATE FUNCTION book_trigger() RETURNS trigger AS $$
|
||||
begin
|
||||
new.search_vector :=
|
||||
setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') ||
|
||||
setweight(to_tsvector('pg_catalog.english', coalesce(new.subtitle, '')), 'B') ||
|
||||
setweight(to_tsvector('pg_catalog.english', coalesce(new.series, '')), 'D') ||
|
||||
(SELECT setweight(to_tsvector('pg_catalog.english', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C')
|
||||
FROM bookwyrm_book
|
||||
LEFT OUTER JOIN bookwyrm_book_authors
|
||||
ON bookwyrm_book.id = bookwyrm_book_authors.book_id
|
||||
LEFT OUTER JOIN bookwyrm_author
|
||||
ON bookwyrm_book_authors.author_id = bookwyrm_author.id
|
||||
WHERE bookwyrm_book.id = new.id
|
||||
);
|
||||
return new;
|
||||
end
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER search_vector_trigger
|
||||
BEFORE INSERT OR UPDATE OF title, subtitle, series, search_vector
|
||||
ON bookwyrm_book
|
||||
FOR EACH ROW EXECUTE FUNCTION book_trigger();
|
||||
|
||||
UPDATE bookwyrm_book SET search_vector = NULL;
|
||||
""",
|
||||
reverse_sql="""
|
||||
DROP TRIGGER IF EXISTS search_vector_trigger
|
||||
ON bookwyrm_book;
|
||||
DROP FUNCTION IF EXISTS book_trigger;
|
||||
""",
|
||||
),
|
||||
]
|
|
@ -1,4 +1,5 @@
|
|||
""" database schema for info about authors """
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.db import models
|
||||
|
||||
from bookwyrm import activitypub
|
||||
|
@ -37,3 +38,8 @@ class Author(BookDataModel):
|
|||
return "https://%s/author/%s" % (DOMAIN, self.id)
|
||||
|
||||
activity_serializer = activitypub.Author
|
||||
|
||||
class Meta:
|
||||
"""sets up postgres GIN index field"""
|
||||
|
||||
indexes = (GinIndex(fields=["search_vector"]),)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
""" database schema for books and shelves """
|
||||
import re
|
||||
|
||||
from django.contrib.postgres.search import SearchVectorField
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.db import models
|
||||
from django.dispatch import receiver
|
||||
from model_utils import FieldTracker
|
||||
|
@ -34,6 +36,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
|||
bnf_id = fields.CharField( # Bibliothèque nationale de France
|
||||
max_length=255, blank=True, null=True, deduplication_field=True
|
||||
)
|
||||
search_vector = SearchVectorField(null=True)
|
||||
|
||||
last_edited_by = fields.ForeignKey(
|
||||
"User",
|
||||
|
@ -142,6 +145,11 @@ class Book(BookDataModel):
|
|||
self.title,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
"""sets up postgres GIN index field"""
|
||||
|
||||
indexes = (GinIndex(fields=["search_vector"]),)
|
||||
|
||||
|
||||
class Work(OrderedCollectionPageMixin, Book):
|
||||
"""a work (an abstract concept of a book that manifests in an edition)"""
|
||||
|
|
Loading…
Reference in a new issue