Merge branch 'main' into storage-s3

This commit is contained in:
Joachim 2021-07-05 13:06:19 +02:00 committed by GitHub
commit 3ea1bcb256
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 3303 additions and 694 deletions

View file

@ -2,7 +2,7 @@
name: Bug report name: Bug report
about: Create a report to help us improve about: Create a report to help us improve
title: '' title: ''
labels: '' labels: 'bug'
assignees: '' assignees: ''
--- ---
@ -23,6 +23,14 @@ A clear and concise description of what you expected to happen.
**Screenshots** **Screenshots**
If applicable, add screenshots to help explain your problem. If applicable, add screenshots to help explain your problem.
**Instance**
On which BookWyrm instance did you encounter this problem.
**Additional context**
Add any other context about the problem here.
---
**Desktop (please complete the following information):** **Desktop (please complete the following information):**
- OS: [e.g. iOS] - OS: [e.g. iOS]
- Browser [e.g. chrome, safari] - Browser [e.g. chrome, safari]
@ -33,6 +41,3 @@ If applicable, add screenshots to help explain your problem.
- OS: [e.g. iOS8.1] - OS: [e.g. iOS8.1]
- Browser [e.g. stock browser, safari] - Browser [e.g. stock browser, safari]
- Version [e.g. 22] - Version [e.g. 22]
**Additional context**
Add any other context about the problem here.

View file

@ -20,7 +20,7 @@ jobs:
services: services:
postgres: postgres:
image: postgres:10 image: postgres:12
env: env:
POSTGRES_USER: postgres POSTGRES_USER: postgres
POSTGRES_PASSWORD: hunter2 POSTGRES_PASSWORD: hunter2
@ -66,4 +66,4 @@ jobs:
EMAIL_USE_TLS: true EMAIL_USE_TLS: true
ENABLE_PREVIEW_IMAGES: true ENABLE_PREVIEW_IMAGES: true
run: | run: |
python manage.py test pytest

View file

@ -2,11 +2,10 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
import logging import logging
from urllib3.exceptions import RequestError
from django.db import transaction from django.db import transaction
import requests import requests
from requests.exceptions import SSLError from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings from bookwyrm import activitypub, models, settings
from .connector_manager import load_more_data, ConnectorException from .connector_manager import load_more_data, ConnectorException
@ -237,7 +236,7 @@ def get_data(url, params=None, timeout=10):
}, },
timeout=timeout, timeout=timeout,
) )
except (RequestError, SSLError, ConnectionError) as err: except RequestException as err:
logger.exception(err) logger.exception(err)
raise ConnectorException() raise ConnectorException()
@ -262,7 +261,7 @@ def get_image(url, timeout=10):
}, },
timeout=timeout, timeout=timeout,
) )
except (RequestError, SSLError) as err: except RequestException as err:
logger.exception(err) logger.exception(err)
return None return None
if not resp.ok: if not resp.ok:

View file

@ -2,7 +2,7 @@
from functools import reduce from functools import reduce
import operator import operator
from django.contrib.postgres.search import SearchRank, SearchVector from django.contrib.postgres.search import SearchRank, SearchQuery
from django.db.models import OuterRef, Subquery, F, Q from django.db.models import OuterRef, Subquery, F, Q
from bookwyrm import models from bookwyrm import models
@ -13,7 +13,7 @@ class Connector(AbstractConnector):
"""instantiate a connector""" """instantiate a connector"""
# pylint: disable=arguments-differ # pylint: disable=arguments-differ
def search(self, query, min_confidence=0.1, raw=False, filters=None): def search(self, query, min_confidence=0, raw=False, filters=None):
"""search your local database""" """search your local database"""
filters = filters or [] filters = filters or []
if not query: if not query:
@ -141,16 +141,11 @@ def search_identifiers(query, *filters):
def search_title_author(query, min_confidence, *filters): def search_title_author(query, min_confidence, *filters):
"""searches for title and author""" """searches for title and author"""
vector = ( query = SearchQuery(query, config="simple") | SearchQuery(query, config="english")
SearchVector("title", weight="A")
+ SearchVector("subtitle", weight="B")
+ SearchVector("authors__name", weight="C")
+ SearchVector("series", weight="D")
)
results = ( results = (
models.Edition.objects.annotate(rank=SearchRank(vector, query)) models.Edition.objects.filter(*filters, search_vector=query)
.filter(*filters, rank__gt=min_confidence) .annotate(rank=SearchRank(F("search_vector"), query))
.filter(rank__gt=min_confidence)
.order_by("-rank") .order_by("-rank")
) )

View file

@ -183,6 +183,7 @@ class EditionForm(CustomForm):
"parent_work", "parent_work",
"shelves", "shelves",
"connector", "connector",
"search_vector",
] ]
@ -194,6 +195,7 @@ class AuthorForm(CustomForm):
"origin_id", "origin_id",
"created_date", "created_date",
"updated_date", "updated_date",
"search_vector",
] ]

View file

@ -0,0 +1,126 @@
# Generated by Django 3.2.4 on 2021-06-23 21:55
import django.contrib.postgres.indexes
import django.contrib.postgres.search
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("bookwyrm", "0076_preview_images"),
]
operations = [
migrations.AddField(
model_name="author",
name="search_vector",
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name="book",
name="search_vector",
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddIndex(
model_name="author",
index=django.contrib.postgres.indexes.GinIndex(
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
),
),
migrations.AddIndex(
model_name="book",
index=django.contrib.postgres.indexes.GinIndex(
fields=["search_vector"], name="bookwyrm_bo_search__51beb3_gin"
),
),
migrations.RunSQL(
sql="""
CREATE FUNCTION book_trigger() RETURNS trigger AS $$
begin
new.search_vector :=
coalesce(
NULLIF(setweight(to_tsvector('english', coalesce(new.title, '')), 'A'), ''),
setweight(to_tsvector('simple', coalesce(new.title, '')), 'A')
) ||
setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') ||
(SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(bookwyrm_author.name), ' '), '')), 'C')
FROM bookwyrm_book
LEFT OUTER JOIN bookwyrm_book_authors
ON bookwyrm_book.id = bookwyrm_book_authors.book_id
LEFT OUTER JOIN bookwyrm_author
ON bookwyrm_book_authors.author_id = bookwyrm_author.id
WHERE bookwyrm_book.id = new.id
) ||
setweight(to_tsvector('english', coalesce(new.series, '')), 'D');
return new;
end
$$ LANGUAGE plpgsql;
CREATE TRIGGER search_vector_trigger
BEFORE INSERT OR UPDATE OF title, subtitle, series, search_vector
ON bookwyrm_book
FOR EACH ROW EXECUTE FUNCTION book_trigger();
UPDATE bookwyrm_book SET search_vector = NULL;
""",
reverse_sql="""
DROP TRIGGER IF EXISTS search_vector_trigger
ON bookwyrm_book;
DROP FUNCTION IF EXISTS book_trigger;
""",
),
# when an author is edited
migrations.RunSQL(
sql="""
CREATE FUNCTION author_trigger() RETURNS trigger AS $$
begin
WITH book AS (
SELECT bookwyrm_book.id as row_id
FROM bookwyrm_author
LEFT OUTER JOIN bookwyrm_book_authors
ON bookwyrm_book_authors.id = new.id
LEFT OUTER JOIN bookwyrm_book
ON bookwyrm_book.id = bookwyrm_book_authors.book_id
)
UPDATE bookwyrm_book SET search_vector = ''
FROM book
WHERE id = book.row_id;
return new;
end
$$ LANGUAGE plpgsql;
CREATE TRIGGER author_search_vector_trigger
AFTER UPDATE OF name
ON bookwyrm_author
FOR EACH ROW EXECUTE FUNCTION author_trigger();
""",
reverse_sql="""
DROP TRIGGER IF EXISTS author_search_vector_trigger
ON bookwyrm_author;
DROP FUNCTION IF EXISTS author_trigger;
""",
),
# when an author is added to or removed from a book
migrations.RunSQL(
sql="""
CREATE FUNCTION book_authors_trigger() RETURNS trigger AS $$
begin
UPDATE bookwyrm_book SET search_vector = ''
WHERE id = coalesce(new.book_id, old.book_id);
return new;
end
$$ LANGUAGE plpgsql;
CREATE TRIGGER book_authors_search_vector_trigger
AFTER INSERT OR DELETE
ON bookwyrm_book_authors
FOR EACH ROW EXECUTE FUNCTION book_authors_trigger();
""",
reverse_sql="""
DROP TRIGGER IF EXISTS book_authors_search_vector_trigger
ON bookwyrm_book_authors;
DROP FUNCTION IF EXISTS book_authors_trigger;
""",
),
]

View file

@ -1,4 +1,5 @@
""" database schema for info about authors """ """ database schema for info about authors """
from django.contrib.postgres.indexes import GinIndex
from django.db import models from django.db import models
from bookwyrm import activitypub from bookwyrm import activitypub
@ -37,3 +38,8 @@ class Author(BookDataModel):
return "https://%s/author/%s" % (DOMAIN, self.id) return "https://%s/author/%s" % (DOMAIN, self.id)
activity_serializer = activitypub.Author activity_serializer = activitypub.Author
class Meta:
"""sets up postgres GIN index field"""
indexes = (GinIndex(fields=["search_vector"]),)

View file

@ -1,6 +1,8 @@
""" database schema for books and shelves """ """ database schema for books and shelves """
import re import re
from django.contrib.postgres.search import SearchVectorField
from django.contrib.postgres.indexes import GinIndex
from django.db import models from django.db import models
from django.dispatch import receiver from django.dispatch import receiver
from model_utils import FieldTracker from model_utils import FieldTracker
@ -34,6 +36,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
bnf_id = fields.CharField( # Bibliothèque nationale de France bnf_id = fields.CharField( # Bibliothèque nationale de France
max_length=255, blank=True, null=True, deduplication_field=True max_length=255, blank=True, null=True, deduplication_field=True
) )
search_vector = SearchVectorField(null=True)
last_edited_by = fields.ForeignKey( last_edited_by = fields.ForeignKey(
"User", "User",
@ -142,6 +145,11 @@ class Book(BookDataModel):
self.title, self.title,
) )
class Meta:
"""sets up postgres GIN index field"""
indexes = (GinIndex(fields=["search_vector"]),)
class Work(OrderedCollectionPageMixin, Book): class Work(OrderedCollectionPageMixin, Book):
"""a work (an abstract concept of a book that manifests in an edition)""" """a work (an abstract concept of a book that manifests in an edition)"""

View file

@ -408,7 +408,8 @@ class ImageField(ActivitypubFieldMixin, models.ImageField):
return None return None
image_content = ContentFile(response.content) image_content = ContentFile(response.content)
image_name = str(uuid4()) + "." + imghdr.what(None, image_content.read()) extension = imghdr.what(None, image_content.read()) or ""
image_name = "{:s}.{:s}".format(str(uuid4()), extension)
return [image_name, image_content] return [image_name, image_content]
def formfield(self, **kwargs): def formfield(self, **kwargs):

View file

@ -168,6 +168,7 @@ LANGUAGES = [
("es", _("Spanish")), ("es", _("Spanish")),
("fr-fr", _("French")), ("fr-fr", _("French")),
("zh-hans", _("Simplified Chinese")), ("zh-hans", _("Simplified Chinese")),
("zh-hant", _("Traditional Chinese")),
] ]

View file

@ -7,6 +7,5 @@ from bookwyrm import settings
# set the default Django settings module for the 'celery' program. # set the default Django settings module for the 'celery' program.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "celerywyrm.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "celerywyrm.settings")
app = Celery( app = Celery(
"tasks", "tasks", broker=settings.CELERY_BROKER, backend=settings.CELERY_RESULT_BACKEND
broker=settings.CELERY_BROKER,
) )

View file

@ -2,7 +2,7 @@
<div class="columns"> <div class="columns">
<div class="column is-narrow is-hidden-mobile"> <div class="column is-narrow is-hidden-mobile">
<figure class="block"> <figure class="block is-w-xl">
<img src="{% if site.logo %}/images/{{ site.logo }}{% else %}{% static "images/logo.png" %}{% endif %}" alt="BookWyrm logo"> <img src="{% if site.logo %}/images/{{ site.logo }}{% else %}{% static "images/logo.png" %}{% endif %}" alt="BookWyrm logo">
</figure> </figure>
</div> </div>

View file

@ -43,68 +43,69 @@ class SelfConnector(TestCase):
self.assertEqual(result.year, 1980) self.assertEqual(result.year, 1980)
self.assertEqual(result.connector, self.connector) self.assertEqual(result.connector, self.connector)
def test_search_rank(self): @patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay")
def test_search_rank(self, _):
"""prioritize certain results""" """prioritize certain results"""
author = models.Author.objects.create(name="Anonymous") author = models.Author.objects.create(name="Anonymous")
with patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay"): edition = models.Edition.objects.create(
edition = models.Edition.objects.create( title="Edition of Example Work",
title="Edition of Example Work", published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc),
published_date=datetime.datetime(1980, 5, 10, tzinfo=timezone.utc), parent_work=models.Work.objects.create(title=""),
parent_work=models.Work.objects.create(title=""), )
) # author text is rank B
# author text is rank C edition.authors.add(author)
edition.authors.add(author)
# series is rank D # series is rank D
models.Edition.objects.create( models.Edition.objects.create(
title="Another Edition", title="Another Edition",
series="Anonymous", series="Anonymous",
parent_work=models.Work.objects.create(title=""), parent_work=models.Work.objects.create(title=""),
) )
# subtitle is rank B # subtitle is rank B
models.Edition.objects.create( models.Edition.objects.create(
title="More Editions", title="More Editions",
subtitle="The Anonymous Edition", subtitle="The Anonymous Edition",
parent_work=models.Work.objects.create(title=""), parent_work=models.Work.objects.create(title=""),
) )
# title is rank A # title is rank A
models.Edition.objects.create(title="Anonymous") models.Edition.objects.create(title="Anonymous")
# doesn't rank in this search # doesn't rank in this search
edition = models.Edition.objects.create( models.Edition.objects.create(
title="An Edition", parent_work=models.Work.objects.create(title="") title="An Edition", parent_work=models.Work.objects.create(title="")
) )
results = self.connector.search("Anonymous") results = self.connector.search("Anonymous")
self.assertEqual(len(results), 3) self.assertEqual(len(results), 4)
self.assertEqual(results[0].title, "Anonymous") self.assertEqual(results[0].title, "Anonymous")
self.assertEqual(results[1].title, "More Editions") self.assertEqual(results[1].title, "More Editions")
self.assertEqual(results[2].title, "Edition of Example Work") self.assertEqual(results[2].title, "Edition of Example Work")
self.assertEqual(results[3].title, "Another Edition")
def test_search_multiple_editions(self): @patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay")
def test_search_multiple_editions(self, _):
"""it should get rid of duplicate editions for the same work""" """it should get rid of duplicate editions for the same work"""
with patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay"): work = models.Work.objects.create(title="Work Title")
work = models.Work.objects.create(title="Work Title") edition_1 = models.Edition.objects.create(
edition_1 = models.Edition.objects.create( title="Edition 1 Title", parent_work=work
title="Edition 1 Title", parent_work=work )
) edition_2 = models.Edition.objects.create(
edition_2 = models.Edition.objects.create( title="Edition 2 Title",
title="Edition 2 Title", parent_work=work,
parent_work=work, isbn_13="123456789", # this is now the defualt edition
edition_rank=20, # that's default babey )
) edition_3 = models.Edition.objects.create(title="Fish", parent_work=work)
edition_3 = models.Edition.objects.create(title="Fish", parent_work=work)
# pick the best edition # pick the best edition
results = self.connector.search("Edition 1 Title") results = self.connector.search("Edition 1 Title")
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_1.remote_id) self.assertEqual(results[0].key, edition_1.remote_id)
# pick the default edition when no match is best # pick the default edition when no match is best
results = self.connector.search("Edition Title") results = self.connector.search("Edition Title")
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_2.remote_id) self.assertEqual(results[0].key, edition_2.remote_id)
# only matches one edition, so no deduplication takes place # only matches one edition, so no deduplication takes place
results = self.connector.search("Fish") results = self.connector.search("Fish")
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0].key, edition_3.remote_id) self.assertEqual(results[0].key, edition_3.remote_id)

View file

@ -1,8 +1,9 @@
""" testing models """ """ testing models """
from unittest.mock import patch
from dateutil.parser import parse from dateutil.parser import parse
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from unittest.mock import patch
from bookwyrm import models, settings from bookwyrm import models, settings
from bookwyrm.models.book import isbn_10_to_13, isbn_13_to_10 from bookwyrm.models.book import isbn_10_to_13, isbn_13_to_10

View file

@ -0,0 +1,77 @@
""" django configuration of postgres """
from unittest.mock import patch
from django.test import TestCase
from bookwyrm import models
@patch("bookwyrm.models.activitypub_mixin.broadcast_task.delay")
@patch("bookwyrm.preview_images.generate_edition_preview_image_task.delay")
class PostgresTriggers(TestCase):
"""special migrations, fancy stuff ya know"""
def test_search_vector_on_create(self, *_):
"""make sure that search_vector is being set correctly on create"""
book = models.Edition.objects.create(title="The Long Goodbye")
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_on_update(self, *_):
"""make sure that search_vector is being set correctly on edit"""
book = models.Edition.objects.create(title="The Long Goodbye")
book.title = "The Even Longer Goodbye"
book.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'even':2A 'goodby':4A 'longer':3A")
def test_search_vector_fields(self, *_):
"""use multiple fields to create search vector"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
subtitle="wow cool",
series="series name",
languages=["irrelevent"],
)
book.authors.add(author)
book.refresh_from_db()
self.assertEqual(
book.search_vector,
"'cool':5B 'goodby':3A 'long':2A 'name':9 'rays':7C 'seri':8 'the':6C 'wow':4B",
)
def test_seach_vector_on_author_update(self, *_):
"""update search when an author name changes"""
author = models.Author.objects.create(name="The Rays")
book = models.Edition.objects.create(
title="The Long Goodbye",
)
book.authors.add(author)
author.name = "Jeremy"
author.save(broadcast=False)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
def test_seach_vector_on_author_delete(self, *_):
"""update search when an author name changes"""
author = models.Author.objects.create(name="Jeremy")
book = models.Edition.objects.create(
title="The Long Goodbye",
)
book.authors.add(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'jeremy':4C 'long':2A")
book.authors.remove(author)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'goodby':3A 'long':2A")
def test_search_vector_stop_word_fallback(self, *_):
"""use a fallback when removing stop words leads to an empty vector"""
book = models.Edition.objects.create(
title="there there",
)
book.refresh_from_db()
self.assertEqual(book.search_vector, "'there':1A,2A")

View file

@ -192,8 +192,8 @@ urlpatterns = [
re_path(r"^import/?$", views.Import.as_view(), name="import"), re_path(r"^import/?$", views.Import.as_view(), name="import"),
re_path(r"^import/(\d+)/?$", views.ImportStatus.as_view(), name="import-status"), re_path(r"^import/(\d+)/?$", views.ImportStatus.as_view(), name="import-status"),
# users # users
re_path(r"%s/?$" % USER_PATH, views.User.as_view(), name="user-feed"),
re_path(r"%s\.json$" % USER_PATH, views.User.as_view()), re_path(r"%s\.json$" % USER_PATH, views.User.as_view()),
re_path(r"%s/?$" % USER_PATH, views.User.as_view(), name="user-feed"),
re_path(r"%s/rss" % USER_PATH, views.rss_feed.RssFeed(), name="user-rss"), re_path(r"%s/rss" % USER_PATH, views.rss_feed.RssFeed(), name="user-rss"),
re_path( re_path(
r"%s/followers(.json)?/?$" % USER_PATH, r"%s/followers(.json)?/?$" % USER_PATH,

View file

@ -84,7 +84,9 @@ class ImportStatus(View):
try: try:
task = app.AsyncResult(job.task_id) task = app.AsyncResult(job.task_id)
except ValueError: # triggers attribute error if the task won't load
task.status # pylint: disable=pointless-statement
except (ValueError, AttributeError):
task = None task = None
items = job.items.order_by("index").all() items = job.items.order_by("index").all()

View file

@ -23,7 +23,7 @@ class Search(View):
def get(self, request): def get(self, request):
"""that search bar up top""" """that search bar up top"""
query = request.GET.get("q") query = request.GET.get("q")
min_confidence = request.GET.get("min_confidence", 0.1) min_confidence = request.GET.get("min_confidence", 0)
search_type = request.GET.get("type") search_type = request.GET.get("type")
search_remote = ( search_remote = (
request.GET.get("remote", False) and request.user.is_authenticated request.GET.get("remote", False) and request.user.is_authenticated

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff