Merge pull request #461 from mouse-reeve/fixes-expand-data

Makes expanding book data fully part of the connector
This commit is contained in:
Mouse Reeve 2021-01-02 09:47:10 -08:00 committed by GitHub
commit dcc833c0c7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 127 additions and 101 deletions

View file

@ -2,3 +2,5 @@
from .settings import CONNECTORS from .settings import CONNECTORS
from .abstract_connector import ConnectorException from .abstract_connector import ConnectorException
from .abstract_connector import get_data, get_image from .abstract_connector import get_data, get_image
from .connector_manager import search, local_search, first_search_result

View file

@ -6,17 +6,13 @@ from urllib3.exceptions import RequestError
from django.db import transaction from django.db import transaction
import requests import requests
from requests import HTTPError
from requests.exceptions import SSLError from requests.exceptions import SSLError
from bookwyrm import activitypub, models, settings from bookwyrm import activitypub, models, settings
from .connector_manager import load_more_data, ConnectorException
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ConnectorException(HTTPError):
''' when the connector can't do what was asked '''
class AbstractMinimalConnector(ABC): class AbstractMinimalConnector(ABC):
''' just the bare bones, for other bookwyrm instances ''' ''' just the bare bones, for other bookwyrm instances '''
def __init__(self, identifier): def __init__(self, identifier):
@ -90,7 +86,6 @@ class AbstractConnector(AbstractMinimalConnector):
return True return True
@transaction.atomic
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
''' translate arbitrary json into an Activitypub dataclass ''' ''' translate arbitrary json into an Activitypub dataclass '''
# first, check if we have the origin_id saved # first, check if we have the origin_id saved
@ -123,13 +118,17 @@ class AbstractConnector(AbstractMinimalConnector):
if not work_data or not edition_data: if not work_data or not edition_data:
raise ConnectorException('Unable to load book data: %s' % remote_id) raise ConnectorException('Unable to load book data: %s' % remote_id)
# create activitypub object with transaction.atomic():
work_activity = activitypub.Work(**work_data) # create activitypub object
# this will dedupe automatically work_activity = activitypub.Work(**work_data)
work = work_activity.to_model(models.Work) # this will dedupe automatically
for author in self.get_authors_from_data(data): work = work_activity.to_model(models.Work)
work.authors.add(author) for author in self.get_authors_from_data(data):
return self.create_edition_from_data(work, edition_data) work.authors.add(author)
edition = self.create_edition_from_data(work, edition_data)
load_more_data.delay(self.connector.id, work.id)
return edition
def create_edition_from_data(self, work, edition_data): def create_edition_from_data(self, work, edition_data):

View file

@ -1,51 +1,15 @@
''' select and call a connector for whatever book task needs doing ''' ''' interface with whatever connectors the app has '''
import importlib import importlib
from urllib.parse import urlparse from urllib.parse import urlparse
from requests import HTTPError from requests import HTTPError
from bookwyrm import models from bookwyrm import models
from bookwyrm.connectors import ConnectorException
from bookwyrm.tasks import app from bookwyrm.tasks import app
def get_edition(book_id): class ConnectorException(HTTPError):
''' look up a book in the db and return an edition ''' ''' when the connector can't do what was asked '''
book = models.Book.objects.select_subclasses().get(id=book_id)
if isinstance(book, models.Work):
book = book.default_edition
return book
def get_or_create_connector(remote_id):
''' get the connector related to the author's server '''
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
raise ValueError('Invalid remote id')
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file='bookwyrm_connector',
base_url='https://%s' % identifier,
books_url='https://%s/book' % identifier,
covers_url='https://%s/images/covers' % identifier,
search_url='https://%s/search?q=' % identifier,
priority=2
)
return load_connector(connector_info)
@app.task
def load_more_data(book_id):
''' background the work of getting all 10,000 editions of LoTR '''
book = models.Book.objects.select_subclasses().get(id=book_id)
connector = load_connector(book.connector)
connector.expand_book_data(book)
def search(query, min_confidence=0.1): def search(query, min_confidence=0.1):
@ -92,6 +56,38 @@ def get_connectors():
yield load_connector(info) yield load_connector(info)
def get_or_create_connector(remote_id):
''' get the connector related to the author's server '''
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
raise ValueError('Invalid remote id')
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file='bookwyrm_connector',
base_url='https://%s' % identifier,
books_url='https://%s/book' % identifier,
covers_url='https://%s/images/covers' % identifier,
search_url='https://%s/search?q=' % identifier,
priority=2
)
return load_connector(connector_info)
@app.task
def load_more_data(connector_id, book_id):
''' background the work of getting all 10,000 editions of LoTR '''
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
book = models.Book.objects.select_subclasses().get(id=book_id)
connector.expand_book_data(book)
def load_connector(connector_info): def load_connector(connector_info):
''' instantiate the connector class ''' ''' instantiate the connector class '''
connector = importlib.import_module( connector = importlib.import_module(

View file

@ -3,7 +3,8 @@ import re
from bookwyrm import models from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import ConnectorException, get_data from .abstract_connector import get_data
from .connector_manager import ConnectorException
from .openlibrary_languages import languages from .openlibrary_languages import languages

View file

@ -6,7 +6,7 @@ from django.contrib.postgres.fields import JSONField
from django.db import models from django.db import models
from django.utils import timezone from django.utils import timezone
from bookwyrm import books_manager from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels from .fields import PrivacyLevels
@ -71,7 +71,7 @@ class ImportItem(models.Model):
def get_book_from_isbn(self): def get_book_from_isbn(self):
''' search by isbn ''' ''' search by isbn '''
search_result = books_manager.first_search_result( search_result = connector_manager.first_search_result(
self.isbn, min_confidence=0.999 self.isbn, min_confidence=0.999
) )
if search_result: if search_result:
@ -86,7 +86,7 @@ class ImportItem(models.Model):
self.data['Title'], self.data['Title'],
self.data['Author'] self.data['Author']
) )
search_result = books_manager.first_search_result( search_result = connector_manager.first_search_result(
search_term, min_confidence=0.999 search_term, min_confidence=0.999
) )
if search_result: if search_result:

View file

@ -1,4 +1,5 @@
''' testing book data connectors ''' ''' testing book data connectors '''
from unittest.mock import patch
from django.test import TestCase from django.test import TestCase
import responses import responses
@ -104,8 +105,10 @@ class AbstractConnector(TestCase):
'https://example.com/book/abcd', 'https://example.com/book/abcd',
json=self.edition_data json=self.edition_data
) )
result = self.connector.get_or_create_book( with patch(
'https://example.com/book/abcd') 'bookwyrm.connectors.abstract_connector.load_more_data.delay'):
result = self.connector.get_or_create_book(
'https://example.com/book/abcd')
self.assertEqual(result, self.book) self.assertEqual(result, self.book)
self.assertEqual(models.Edition.objects.count(), 1) self.assertEqual(models.Edition.objects.count(), 1)
self.assertEqual(models.Edition.objects.count(), 1) self.assertEqual(models.Edition.objects.count(), 1)

View file

@ -1,12 +1,18 @@
''' interface between the app and various connectors '''
from django.test import TestCase from django.test import TestCase
from bookwyrm import books_manager, models from bookwyrm import models
from bookwyrm.connectors.bookwyrm_connector import Connector as BookWyrmConnector from bookwyrm.connectors import connector_manager
from bookwyrm.connectors.self_connector import Connector as SelfConnector from bookwyrm.connectors.bookwyrm_connector \
import Connector as BookWyrmConnector
from bookwyrm.connectors.self_connector \
import Connector as SelfConnector
class Book(TestCase): class ConnectorManager(TestCase):
''' interface between the app and various connectors '''
def setUp(self): def setUp(self):
''' we'll need some books and a connector info entry '''
self.work = models.Work.objects.create( self.work = models.Work.objects.create(
title='Example Work' title='Example Work'
) )
@ -28,53 +34,50 @@ class Book(TestCase):
covers_url='http://test.com/', covers_url='http://test.com/',
) )
def test_get_edition(self):
edition = books_manager.get_edition(self.edition.id)
self.assertEqual(edition, self.edition)
def test_get_edition_work(self):
edition = books_manager.get_edition(self.work.id)
self.assertEqual(edition, self.edition)
def test_get_or_create_connector(self): def test_get_or_create_connector(self):
''' loads a connector if the data source is known or creates one '''
remote_id = 'https://example.com/object/1' remote_id = 'https://example.com/object/1'
connector = books_manager.get_or_create_connector(remote_id) connector = connector_manager.get_or_create_connector(remote_id)
self.assertIsInstance(connector, BookWyrmConnector) self.assertIsInstance(connector, BookWyrmConnector)
self.assertEqual(connector.identifier, 'example.com') self.assertEqual(connector.identifier, 'example.com')
self.assertEqual(connector.base_url, 'https://example.com') self.assertEqual(connector.base_url, 'https://example.com')
same_connector = books_manager.get_or_create_connector(remote_id) same_connector = connector_manager.get_or_create_connector(remote_id)
self.assertEqual(connector.identifier, same_connector.identifier) self.assertEqual(connector.identifier, same_connector.identifier)
def test_get_connectors(self): def test_get_connectors(self):
''' load all connectors '''
remote_id = 'https://example.com/object/1' remote_id = 'https://example.com/object/1'
books_manager.get_or_create_connector(remote_id) connector_manager.get_or_create_connector(remote_id)
connectors = list(books_manager.get_connectors()) connectors = list(connector_manager.get_connectors())
self.assertEqual(len(connectors), 2) self.assertEqual(len(connectors), 2)
self.assertIsInstance(connectors[0], SelfConnector) self.assertIsInstance(connectors[0], SelfConnector)
self.assertIsInstance(connectors[1], BookWyrmConnector) self.assertIsInstance(connectors[1], BookWyrmConnector)
def test_search(self): def test_search(self):
results = books_manager.search('Example') ''' search all connectors '''
results = connector_manager.search('Example')
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertIsInstance(results[0]['connector'], SelfConnector) self.assertIsInstance(results[0]['connector'], SelfConnector)
self.assertEqual(len(results[0]['results']), 1) self.assertEqual(len(results[0]['results']), 1)
self.assertEqual(results[0]['results'][0].title, 'Example Edition') self.assertEqual(results[0]['results'][0].title, 'Example Edition')
def test_local_search(self): def test_local_search(self):
results = books_manager.local_search('Example') ''' search only the local database '''
results = connector_manager.local_search('Example')
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, 'Example Edition') self.assertEqual(results[0].title, 'Example Edition')
def test_first_search_result(self): def test_first_search_result(self):
result = books_manager.first_search_result('Example') ''' only get one search result '''
result = connector_manager.first_search_result('Example')
self.assertEqual(result.title, 'Example Edition') self.assertEqual(result.title, 'Example Edition')
no_result = books_manager.first_search_result('dkjfhg') no_result = connector_manager.first_search_result('dkjfhg')
self.assertIsNone(no_result) self.assertIsNone(no_result)
def test_load_connector(self): def test_load_connector(self):
connector = books_manager.load_connector(self.connector) ''' load a connector object from the database entry '''
connector = connector_manager.load_connector(self.connector)
self.assertIsInstance(connector, SelfConnector) self.assertIsInstance(connector, SelfConnector)
self.assertEqual(connector.identifier, 'test_connector') self.assertEqual(connector.identifier, 'test_connector')

View file

@ -12,7 +12,7 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description
from bookwyrm.connectors.openlibrary import pick_default_edition, \ from bookwyrm.connectors.openlibrary import pick_default_edition, \
get_openlibrary_key get_openlibrary_key
from bookwyrm.connectors.abstract_connector import SearchResult from bookwyrm.connectors.abstract_connector import SearchResult
from bookwyrm.connectors.abstract_connector import ConnectorException from bookwyrm.connectors.connector_manager import ConnectorException
class Openlibrary(TestCase): class Openlibrary(TestCase):

View file

@ -8,7 +8,8 @@ from django.utils import timezone
from django.test import TestCase from django.test import TestCase
import responses import responses
from bookwyrm import books_manager, models from bookwyrm import models
from bookwyrm.connectors import connector_manager
from bookwyrm.connectors.abstract_connector import SearchResult from bookwyrm.connectors.abstract_connector import SearchResult
@ -134,7 +135,7 @@ class ImportJob(TestCase):
search_url='https://openlibrary.org/search?q=', search_url='https://openlibrary.org/search?q=',
priority=3, priority=3,
) )
connector = books_manager.load_connector(connector_info) connector = connector_manager.load_connector(connector_info)
result = SearchResult( result = SearchResult(
title='Test Result', title='Test Result',
key='https://openlibrary.org/works/OL1234W', key='https://openlibrary.org/works/OL1234W',
@ -163,8 +164,12 @@ class ImportJob(TestCase):
json={'name': 'test author'}, json={'name': 'test author'},
status=200) status=200)
with patch('bookwyrm.books_manager.first_search_result') as search: with patch(
search.return_value = result 'bookwyrm.connectors.abstract_connector.load_more_data.delay'):
book = self.item_1.get_book_from_isbn() with patch(
'bookwyrm.connectors.connector_manager.first_search_result'
) as search:
search.return_value = result
book = self.item_1.get_book_from_isbn()
self.assertEqual(book.title, 'Sabriel') self.assertEqual(book.title, 'Sabriel')

View file

@ -39,6 +39,14 @@ class Views(TestCase):
) )
def test_get_edition(self):
''' given an edition or a work, returns an edition '''
self.assertEqual(
views.get_edition(self.book.id), self.book)
self.assertEqual(
views.get_edition(self.work.id), self.book)
def test_get_user_from_username(self): def test_get_user_from_username(self):
''' works for either localname or username ''' ''' works for either localname or username '''
self.assertEqual( self.assertEqual(
@ -193,7 +201,8 @@ class Views(TestCase):
request = self.factory.get('', {'q': 'Test Book'}) request = self.factory.get('', {'q': 'Test Book'})
with patch('bookwyrm.views.is_api_request') as is_api: with patch('bookwyrm.views.is_api_request') as is_api:
is_api.return_value = False is_api.return_value = False
with patch('bookwyrm.books_manager.search') as manager: with patch(
'bookwyrm.connectors.connector_manager.search') as manager:
manager.return_value = [search_result] manager.return_value = [search_result]
response = views.search(request) response = views.search(request)
self.assertIsInstance(response, TemplateResponse) self.assertIsInstance(response, TemplateResponse)

View file

@ -17,11 +17,12 @@ from django.template.response import TemplateResponse
from django.utils import timezone from django.utils import timezone
from django.views.decorators.http import require_GET, require_POST from django.views.decorators.http import require_GET, require_POST
from bookwyrm import books_manager, forms, models, outgoing, goodreads_import from bookwyrm import forms, models, outgoing, goodreads_import
from bookwyrm.connectors import connector_manager
from bookwyrm.broadcast import broadcast from bookwyrm.broadcast import broadcast
from bookwyrm.emailing import password_reset_email from bookwyrm.emailing import password_reset_email
from bookwyrm.settings import DOMAIN from bookwyrm.settings import DOMAIN
from bookwyrm.views import get_user_from_username from bookwyrm.views import get_user_from_username, get_edition
@require_POST @require_POST
@ -210,10 +211,8 @@ def edit_profile(request):
def resolve_book(request): def resolve_book(request):
''' figure out the local path to a book from a remote_id ''' ''' figure out the local path to a book from a remote_id '''
remote_id = request.POST.get('remote_id') remote_id = request.POST.get('remote_id')
connector = books_manager.get_or_create_connector(remote_id) connector = connector_manager.get_or_create_connector(remote_id)
book = connector.get_or_create_book(remote_id) book = connector.get_or_create_book(remote_id)
if book.connector:
books_manager.load_more_data.delay(book.id)
return redirect('/book/%d' % book.id) return redirect('/book/%d' % book.id)
@ -371,7 +370,7 @@ def delete_shelf(request, shelf_id):
@require_POST @require_POST
def shelve(request): def shelve(request):
''' put a on a user's shelf ''' ''' put a on a user's shelf '''
book = books_manager.get_edition(request.POST['book']) book = get_edition(request.POST['book'])
desired_shelf = models.Shelf.objects.filter( desired_shelf = models.Shelf.objects.filter(
identifier=request.POST['shelf'], identifier=request.POST['shelf'],
@ -417,7 +416,7 @@ def unshelve(request):
@require_POST @require_POST
def start_reading(request, book_id): def start_reading(request, book_id):
''' begin reading a book ''' ''' begin reading a book '''
book = books_manager.get_edition(book_id) book = get_edition(book_id)
shelf = models.Shelf.objects.filter( shelf = models.Shelf.objects.filter(
identifier='reading', identifier='reading',
user=request.user user=request.user
@ -453,7 +452,7 @@ def start_reading(request, book_id):
@require_POST @require_POST
def finish_reading(request, book_id): def finish_reading(request, book_id):
''' a user completed a book, yay ''' ''' a user completed a book, yay '''
book = books_manager.get_edition(book_id) book = get_edition(book_id)
shelf = models.Shelf.objects.filter( shelf = models.Shelf.objects.filter(
identifier='read', identifier='read',
user=request.user user=request.user

View file

@ -13,14 +13,22 @@ from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_GET from django.views.decorators.http import require_GET
from bookwyrm import outgoing from bookwyrm import outgoing
from bookwyrm.activitypub import ActivitypubResponse from bookwyrm import forms, models
from bookwyrm import forms, models, books_manager
from bookwyrm import goodreads_import from bookwyrm import goodreads_import
from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager
from bookwyrm.settings import PAGE_LENGTH from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.tasks import app from bookwyrm.tasks import app
from bookwyrm.utils import regex from bookwyrm.utils import regex
def get_edition(book_id):
''' look up a book in the db and return an edition '''
book = models.Book.objects.select_subclasses().get(id=book_id)
if isinstance(book, models.Work):
book = book.get_default_edition()
return book
def get_user_from_username(username): def get_user_from_username(username):
''' helper function to resolve a localname or a username to a user ''' ''' helper function to resolve a localname or a username to a user '''
# raises DoesNotExist if user is now found # raises DoesNotExist if user is now found
@ -211,7 +219,7 @@ def search(request):
if is_api_request(request): if is_api_request(request):
# only return local book results via json so we don't cause a cascade # only return local book results via json so we don't cause a cascade
book_results = books_manager.local_search(query) book_results = connector_manager.local_search(query)
return JsonResponse([r.json() for r in book_results], safe=False) return JsonResponse([r.json() for r in book_results], safe=False)
# use webfinger for mastodon style account@domain.com username # use webfinger for mastodon style account@domain.com username
@ -225,7 +233,7 @@ def search(request):
similarity__gt=0.5, similarity__gt=0.5,
).order_by('-similarity')[:10] ).order_by('-similarity')[:10]
book_results = books_manager.search(query) book_results = connector_manager.search(query)
data = { data = {
'title': 'Search Results', 'title': 'Search Results',
'book_results': book_results, 'book_results': book_results,
@ -645,7 +653,7 @@ def book_page(request, book_id):
@require_GET @require_GET
def edit_book_page(request, book_id): def edit_book_page(request, book_id):
''' info about a book ''' ''' info about a book '''
book = books_manager.get_edition(book_id) book = get_edition(book_id)
if not book.description: if not book.description:
book.description = book.parent_work.description book.description = book.parent_work.description
data = { data = {

View file

@ -20,8 +20,9 @@ app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs. # Load task modules from all registered Django app configs.
app.autodiscover_tasks() app.autodiscover_tasks()
app.autodiscover_tasks(['bookwyrm'], related_name='activitypub.base_activity') app.autodiscover_tasks(['bookwyrm'], related_name='activitypub.base_activity')
app.autodiscover_tasks(['bookwyrm'], related_name='books_manager')
app.autodiscover_tasks(['bookwyrm'], related_name='broadcast') app.autodiscover_tasks(['bookwyrm'], related_name='broadcast')
app.autodiscover_tasks(
['bookwyrm'], related_name='connectors.abstract_connector')
app.autodiscover_tasks(['bookwyrm'], related_name='emailing') app.autodiscover_tasks(['bookwyrm'], related_name='emailing')
app.autodiscover_tasks(['bookwyrm'], related_name='goodreads_import') app.autodiscover_tasks(['bookwyrm'], related_name='goodreads_import')
app.autodiscover_tasks(['bookwyrm'], related_name='incoming') app.autodiscover_tasks(['bookwyrm'], related_name='incoming')