forked from mirrors/bookwyrm
More efficient search index
Co-authored-by: asmr-hex <0.0@asmr.software>
This commit is contained in:
parent
ae1d0343ba
commit
7c15fbbb0b
4 changed files with 87 additions and 10 deletions
|
@ -2,7 +2,7 @@
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
from django.contrib.postgres.search import SearchRank, SearchVector
|
from django.contrib.postgres.search import SearchRank
|
||||||
from django.db.models import OuterRef, Subquery, F, Q
|
from django.db.models import OuterRef, Subquery, F, Q
|
||||||
|
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
|
@ -141,16 +141,9 @@ def search_identifiers(query, *filters):
|
||||||
|
|
||||||
def search_title_author(query, min_confidence, *filters):
|
def search_title_author(query, min_confidence, *filters):
|
||||||
"""searches for title and author"""
|
"""searches for title and author"""
|
||||||
vector = (
|
|
||||||
SearchVector("title", weight="A")
|
|
||||||
+ SearchVector("subtitle", weight="B")
|
|
||||||
+ SearchVector("authors__name", weight="C")
|
|
||||||
+ SearchVector("series", weight="D")
|
|
||||||
)
|
|
||||||
|
|
||||||
results = (
|
results = (
|
||||||
models.Edition.objects.annotate(rank=SearchRank(vector, query))
|
models.Edition.objects.annotate(rank=SearchRank("search_vector", query))
|
||||||
.filter(*filters, rank__gt=min_confidence)
|
.filter(*filters, search_vector=query, rank__gt=min_confidence)
|
||||||
.order_by("-rank")
|
.order_by("-rank")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
70
bookwyrm/migrations/0077_auto_20210623_2155.py
Normal file
70
bookwyrm/migrations/0077_auto_20210623_2155.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# Generated by Django 3.2.4 on 2021-06-23 21:55
|
||||||
|
|
||||||
|
import django.contrib.postgres.indexes
|
||||||
|
import django.contrib.postgres.search
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("bookwyrm", "0076_preview_images"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="author",
|
||||||
|
name="search_vector",
|
||||||
|
field=django.contrib.postgres.search.SearchVectorField(null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="book",
|
||||||
|
name="search_vector",
|
||||||
|
field=django.contrib.postgres.search.SearchVectorField(null=True),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="author",
|
||||||
|
index=django.contrib.postgres.indexes.GinIndex(
|
||||||
|
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="book",
|
||||||
|
index=django.contrib.postgres.indexes.GinIndex(
|
||||||
|
fields=["search_vector"], name="bookwyrm_bo_search__51beb3_gin"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql="""
|
||||||
|
CREATE FUNCTION book_trigger() RETURNS trigger AS $$
|
||||||
|
begin
|
||||||
|
new.search_vector :=
|
||||||
|
setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') ||
|
||||||
|
setweight(to_tsvector('pg_catalog.english', coalesce(new.subtitle, '')), 'B') ||
|
||||||
|
setweight(to_tsvector('pg_catalog.english', coalesce(new.series, '')), 'D') ||
|
||||||
|
(SELECT setweight(to_tsvector('pg_catalog.english', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C')
|
||||||
|
FROM bookwyrm_book
|
||||||
|
LEFT OUTER JOIN bookwyrm_book_authors
|
||||||
|
ON bookwyrm_book.id = bookwyrm_book_authors.book_id
|
||||||
|
LEFT OUTER JOIN bookwyrm_author
|
||||||
|
ON bookwyrm_book_authors.author_id = bookwyrm_author.id
|
||||||
|
WHERE bookwyrm_book.id = new.id
|
||||||
|
);
|
||||||
|
return new;
|
||||||
|
end
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TRIGGER search_vector_trigger
|
||||||
|
BEFORE INSERT OR UPDATE OF title, subtitle, series, search_vector
|
||||||
|
ON bookwyrm_book
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION book_trigger();
|
||||||
|
|
||||||
|
UPDATE bookwyrm_book SET search_vector = NULL;
|
||||||
|
""",
|
||||||
|
reverse_sql="""
|
||||||
|
DROP TRIGGER IF EXISTS search_vector_trigger
|
||||||
|
ON bookwyrm_book;
|
||||||
|
DROP FUNCTION IF EXISTS book_trigger;
|
||||||
|
""",
|
||||||
|
),
|
||||||
|
]
|
|
@ -1,4 +1,5 @@
|
||||||
""" database schema for info about authors """
|
""" database schema for info about authors """
|
||||||
|
from django.contrib.postgres.indexes import GinIndex
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
from bookwyrm import activitypub
|
from bookwyrm import activitypub
|
||||||
|
@ -37,3 +38,8 @@ class Author(BookDataModel):
|
||||||
return "https://%s/author/%s" % (DOMAIN, self.id)
|
return "https://%s/author/%s" % (DOMAIN, self.id)
|
||||||
|
|
||||||
activity_serializer = activitypub.Author
|
activity_serializer = activitypub.Author
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
"""sets up postgres GIN index field"""
|
||||||
|
|
||||||
|
indexes = (GinIndex(fields=["search_vector"]),)
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
""" database schema for books and shelves """
|
""" database schema for books and shelves """
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from django.contrib.postgres.search import SearchVectorField
|
||||||
|
from django.contrib.postgres.indexes import GinIndex
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
from model_utils import FieldTracker
|
from model_utils import FieldTracker
|
||||||
|
@ -34,6 +36,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
|
||||||
bnf_id = fields.CharField( # Bibliothèque nationale de France
|
bnf_id = fields.CharField( # Bibliothèque nationale de France
|
||||||
max_length=255, blank=True, null=True, deduplication_field=True
|
max_length=255, blank=True, null=True, deduplication_field=True
|
||||||
)
|
)
|
||||||
|
search_vector = SearchVectorField(null=True)
|
||||||
|
|
||||||
last_edited_by = fields.ForeignKey(
|
last_edited_by = fields.ForeignKey(
|
||||||
"User",
|
"User",
|
||||||
|
@ -142,6 +145,11 @@ class Book(BookDataModel):
|
||||||
self.title,
|
self.title,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
"""sets up postgres GIN index field"""
|
||||||
|
|
||||||
|
indexes = (GinIndex(fields=["search_vector"]),)
|
||||||
|
|
||||||
|
|
||||||
class Work(OrderedCollectionPageMixin, Book):
|
class Work(OrderedCollectionPageMixin, Book):
|
||||||
"""a work (an abstract concept of a book that manifests in an edition)"""
|
"""a work (an abstract concept of a book that manifests in an edition)"""
|
||||||
|
|
Loading…
Reference in a new issue