Merge pull request #461 from mouse-reeve/fixes-expand-data

Makes expanding book data fully part of the connector
This commit is contained in:
Mouse Reeve 2021-01-02 09:47:10 -08:00 committed by GitHub
commit dcc833c0c7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 127 additions and 101 deletions

View file

@ -2,3 +2,5 @@
from .settings import CONNECTORS
from .abstract_connector import ConnectorException
from .abstract_connector import get_data, get_image
from .connector_manager import search, local_search, first_search_result

View file

@ -6,17 +6,13 @@ from urllib3.exceptions import RequestError
from django.db import transaction
import requests
from requests import HTTPError
from requests.exceptions import SSLError
from bookwyrm import activitypub, models, settings
from .connector_manager import load_more_data, ConnectorException
logger = logging.getLogger(__name__)
class ConnectorException(HTTPError):
''' when the connector can't do what was asked '''
class AbstractMinimalConnector(ABC):
''' just the bare bones, for other bookwyrm instances '''
def __init__(self, identifier):
@ -90,7 +86,6 @@ class AbstractConnector(AbstractMinimalConnector):
return True
@transaction.atomic
def get_or_create_book(self, remote_id):
''' translate arbitrary json into an Activitypub dataclass '''
# first, check if we have the origin_id saved
@ -123,13 +118,17 @@ class AbstractConnector(AbstractMinimalConnector):
if not work_data or not edition_data:
raise ConnectorException('Unable to load book data: %s' % remote_id)
# create activitypub object
work_activity = activitypub.Work(**work_data)
# this will dedupe automatically
work = work_activity.to_model(models.Work)
for author in self.get_authors_from_data(data):
work.authors.add(author)
return self.create_edition_from_data(work, edition_data)
with transaction.atomic():
# create activitypub object
work_activity = activitypub.Work(**work_data)
# this will dedupe automatically
work = work_activity.to_model(models.Work)
for author in self.get_authors_from_data(data):
work.authors.add(author)
edition = self.create_edition_from_data(work, edition_data)
load_more_data.delay(self.connector.id, work.id)
return edition
def create_edition_from_data(self, work, edition_data):

View file

@ -1,51 +1,15 @@
''' select and call a connector for whatever book task needs doing '''
''' interface with whatever connectors the app has '''
import importlib
from urllib.parse import urlparse
from requests import HTTPError
from bookwyrm import models
from bookwyrm.connectors import ConnectorException
from bookwyrm.tasks import app
def get_edition(book_id):
''' look up a book in the db and return an edition '''
book = models.Book.objects.select_subclasses().get(id=book_id)
if isinstance(book, models.Work):
book = book.default_edition
return book
def get_or_create_connector(remote_id):
''' get the connector related to the author's server '''
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
raise ValueError('Invalid remote id')
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file='bookwyrm_connector',
base_url='https://%s' % identifier,
books_url='https://%s/book' % identifier,
covers_url='https://%s/images/covers' % identifier,
search_url='https://%s/search?q=' % identifier,
priority=2
)
return load_connector(connector_info)
@app.task
def load_more_data(book_id):
''' background the work of getting all 10,000 editions of LoTR '''
book = models.Book.objects.select_subclasses().get(id=book_id)
connector = load_connector(book.connector)
connector.expand_book_data(book)
class ConnectorException(HTTPError):
''' when the connector can't do what was asked '''
def search(query, min_confidence=0.1):
@ -92,6 +56,38 @@ def get_connectors():
yield load_connector(info)
def get_or_create_connector(remote_id):
''' get the connector related to the author's server '''
url = urlparse(remote_id)
identifier = url.netloc
if not identifier:
raise ValueError('Invalid remote id')
try:
connector_info = models.Connector.objects.get(identifier=identifier)
except models.Connector.DoesNotExist:
connector_info = models.Connector.objects.create(
identifier=identifier,
connector_file='bookwyrm_connector',
base_url='https://%s' % identifier,
books_url='https://%s/book' % identifier,
covers_url='https://%s/images/covers' % identifier,
search_url='https://%s/search?q=' % identifier,
priority=2
)
return load_connector(connector_info)
@app.task
def load_more_data(connector_id, book_id):
''' background the work of getting all 10,000 editions of LoTR '''
connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info)
book = models.Book.objects.select_subclasses().get(id=book_id)
connector.expand_book_data(book)
def load_connector(connector_info):
''' instantiate the connector class '''
connector = importlib.import_module(

View file

@ -3,7 +3,8 @@ import re
from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import ConnectorException, get_data
from .abstract_connector import get_data
from .connector_manager import ConnectorException
from .openlibrary_languages import languages

View file

@ -6,7 +6,7 @@ from django.contrib.postgres.fields import JSONField
from django.db import models
from django.utils import timezone
from bookwyrm import books_manager
from bookwyrm.connectors import connector_manager
from bookwyrm.models import ReadThrough, User, Book
from .fields import PrivacyLevels
@ -71,7 +71,7 @@ class ImportItem(models.Model):
def get_book_from_isbn(self):
''' search by isbn '''
search_result = books_manager.first_search_result(
search_result = connector_manager.first_search_result(
self.isbn, min_confidence=0.999
)
if search_result:
@ -86,7 +86,7 @@ class ImportItem(models.Model):
self.data['Title'],
self.data['Author']
)
search_result = books_manager.first_search_result(
search_result = connector_manager.first_search_result(
search_term, min_confidence=0.999
)
if search_result:

View file

@ -1,4 +1,5 @@
''' testing book data connectors '''
from unittest.mock import patch
from django.test import TestCase
import responses
@ -104,8 +105,10 @@ class AbstractConnector(TestCase):
'https://example.com/book/abcd',
json=self.edition_data
)
result = self.connector.get_or_create_book(
'https://example.com/book/abcd')
with patch(
'bookwyrm.connectors.abstract_connector.load_more_data.delay'):
result = self.connector.get_or_create_book(
'https://example.com/book/abcd')
self.assertEqual(result, self.book)
self.assertEqual(models.Edition.objects.count(), 1)
self.assertEqual(models.Edition.objects.count(), 1)

View file

@ -1,12 +1,18 @@
''' interface between the app and various connectors '''
from django.test import TestCase
from bookwyrm import books_manager, models
from bookwyrm.connectors.bookwyrm_connector import Connector as BookWyrmConnector
from bookwyrm.connectors.self_connector import Connector as SelfConnector
from bookwyrm import models
from bookwyrm.connectors import connector_manager
from bookwyrm.connectors.bookwyrm_connector \
import Connector as BookWyrmConnector
from bookwyrm.connectors.self_connector \
import Connector as SelfConnector
class Book(TestCase):
class ConnectorManager(TestCase):
''' interface between the app and various connectors '''
def setUp(self):
''' we'll need some books and a connector info entry '''
self.work = models.Work.objects.create(
title='Example Work'
)
@ -28,53 +34,50 @@ class Book(TestCase):
covers_url='http://test.com/',
)
def test_get_edition(self):
edition = books_manager.get_edition(self.edition.id)
self.assertEqual(edition, self.edition)
def test_get_edition_work(self):
edition = books_manager.get_edition(self.work.id)
self.assertEqual(edition, self.edition)
def test_get_or_create_connector(self):
''' loads a connector if the data source is known or creates one '''
remote_id = 'https://example.com/object/1'
connector = books_manager.get_or_create_connector(remote_id)
connector = connector_manager.get_or_create_connector(remote_id)
self.assertIsInstance(connector, BookWyrmConnector)
self.assertEqual(connector.identifier, 'example.com')
self.assertEqual(connector.base_url, 'https://example.com')
same_connector = books_manager.get_or_create_connector(remote_id)
same_connector = connector_manager.get_or_create_connector(remote_id)
self.assertEqual(connector.identifier, same_connector.identifier)
def test_get_connectors(self):
''' load all connectors '''
remote_id = 'https://example.com/object/1'
books_manager.get_or_create_connector(remote_id)
connectors = list(books_manager.get_connectors())
connector_manager.get_or_create_connector(remote_id)
connectors = list(connector_manager.get_connectors())
self.assertEqual(len(connectors), 2)
self.assertIsInstance(connectors[0], SelfConnector)
self.assertIsInstance(connectors[1], BookWyrmConnector)
def test_search(self):
results = books_manager.search('Example')
''' search all connectors '''
results = connector_manager.search('Example')
self.assertEqual(len(results), 1)
self.assertIsInstance(results[0]['connector'], SelfConnector)
self.assertEqual(len(results[0]['results']), 1)
self.assertEqual(results[0]['results'][0].title, 'Example Edition')
def test_local_search(self):
results = books_manager.local_search('Example')
''' search only the local database '''
results = connector_manager.local_search('Example')
self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, 'Example Edition')
def test_first_search_result(self):
result = books_manager.first_search_result('Example')
''' only get one search result '''
result = connector_manager.first_search_result('Example')
self.assertEqual(result.title, 'Example Edition')
no_result = books_manager.first_search_result('dkjfhg')
no_result = connector_manager.first_search_result('dkjfhg')
self.assertIsNone(no_result)
def test_load_connector(self):
connector = books_manager.load_connector(self.connector)
''' load a connector object from the database entry '''
connector = connector_manager.load_connector(self.connector)
self.assertIsInstance(connector, SelfConnector)
self.assertEqual(connector.identifier, 'test_connector')

View file

@ -12,7 +12,7 @@ from bookwyrm.connectors.openlibrary import get_languages, get_description
from bookwyrm.connectors.openlibrary import pick_default_edition, \
get_openlibrary_key
from bookwyrm.connectors.abstract_connector import SearchResult
from bookwyrm.connectors.abstract_connector import ConnectorException
from bookwyrm.connectors.connector_manager import ConnectorException
class Openlibrary(TestCase):

View file

@ -8,7 +8,8 @@ from django.utils import timezone
from django.test import TestCase
import responses
from bookwyrm import books_manager, models
from bookwyrm import models
from bookwyrm.connectors import connector_manager
from bookwyrm.connectors.abstract_connector import SearchResult
@ -134,7 +135,7 @@ class ImportJob(TestCase):
search_url='https://openlibrary.org/search?q=',
priority=3,
)
connector = books_manager.load_connector(connector_info)
connector = connector_manager.load_connector(connector_info)
result = SearchResult(
title='Test Result',
key='https://openlibrary.org/works/OL1234W',
@ -163,8 +164,12 @@ class ImportJob(TestCase):
json={'name': 'test author'},
status=200)
with patch('bookwyrm.books_manager.first_search_result') as search:
search.return_value = result
book = self.item_1.get_book_from_isbn()
with patch(
'bookwyrm.connectors.abstract_connector.load_more_data.delay'):
with patch(
'bookwyrm.connectors.connector_manager.first_search_result'
) as search:
search.return_value = result
book = self.item_1.get_book_from_isbn()
self.assertEqual(book.title, 'Sabriel')

View file

@ -39,6 +39,14 @@ class Views(TestCase):
)
def test_get_edition(self):
''' given an edition or a work, returns an edition '''
self.assertEqual(
views.get_edition(self.book.id), self.book)
self.assertEqual(
views.get_edition(self.work.id), self.book)
def test_get_user_from_username(self):
''' works for either localname or username '''
self.assertEqual(
@ -193,7 +201,8 @@ class Views(TestCase):
request = self.factory.get('', {'q': 'Test Book'})
with patch('bookwyrm.views.is_api_request') as is_api:
is_api.return_value = False
with patch('bookwyrm.books_manager.search') as manager:
with patch(
'bookwyrm.connectors.connector_manager.search') as manager:
manager.return_value = [search_result]
response = views.search(request)
self.assertIsInstance(response, TemplateResponse)

View file

@ -17,11 +17,12 @@ from django.template.response import TemplateResponse
from django.utils import timezone
from django.views.decorators.http import require_GET, require_POST
from bookwyrm import books_manager, forms, models, outgoing, goodreads_import
from bookwyrm import forms, models, outgoing, goodreads_import
from bookwyrm.connectors import connector_manager
from bookwyrm.broadcast import broadcast
from bookwyrm.emailing import password_reset_email
from bookwyrm.settings import DOMAIN
from bookwyrm.views import get_user_from_username
from bookwyrm.views import get_user_from_username, get_edition
@require_POST
@ -210,10 +211,8 @@ def edit_profile(request):
def resolve_book(request):
''' figure out the local path to a book from a remote_id '''
remote_id = request.POST.get('remote_id')
connector = books_manager.get_or_create_connector(remote_id)
connector = connector_manager.get_or_create_connector(remote_id)
book = connector.get_or_create_book(remote_id)
if book.connector:
books_manager.load_more_data.delay(book.id)
return redirect('/book/%d' % book.id)
@ -371,7 +370,7 @@ def delete_shelf(request, shelf_id):
@require_POST
def shelve(request):
''' put a on a user's shelf '''
book = books_manager.get_edition(request.POST['book'])
book = get_edition(request.POST['book'])
desired_shelf = models.Shelf.objects.filter(
identifier=request.POST['shelf'],
@ -417,7 +416,7 @@ def unshelve(request):
@require_POST
def start_reading(request, book_id):
''' begin reading a book '''
book = books_manager.get_edition(book_id)
book = get_edition(book_id)
shelf = models.Shelf.objects.filter(
identifier='reading',
user=request.user
@ -453,7 +452,7 @@ def start_reading(request, book_id):
@require_POST
def finish_reading(request, book_id):
''' a user completed a book, yay '''
book = books_manager.get_edition(book_id)
book = get_edition(book_id)
shelf = models.Shelf.objects.filter(
identifier='read',
user=request.user

View file

@ -13,14 +13,22 @@ from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_GET
from bookwyrm import outgoing
from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm import forms, models, books_manager
from bookwyrm import forms, models
from bookwyrm import goodreads_import
from bookwyrm.activitypub import ActivitypubResponse
from bookwyrm.connectors import connector_manager
from bookwyrm.settings import PAGE_LENGTH
from bookwyrm.tasks import app
from bookwyrm.utils import regex
def get_edition(book_id):
''' look up a book in the db and return an edition '''
book = models.Book.objects.select_subclasses().get(id=book_id)
if isinstance(book, models.Work):
book = book.get_default_edition()
return book
def get_user_from_username(username):
''' helper function to resolve a localname or a username to a user '''
# raises DoesNotExist if user is now found
@ -211,7 +219,7 @@ def search(request):
if is_api_request(request):
# only return local book results via json so we don't cause a cascade
book_results = books_manager.local_search(query)
book_results = connector_manager.local_search(query)
return JsonResponse([r.json() for r in book_results], safe=False)
# use webfinger for mastodon style account@domain.com username
@ -225,7 +233,7 @@ def search(request):
similarity__gt=0.5,
).order_by('-similarity')[:10]
book_results = books_manager.search(query)
book_results = connector_manager.search(query)
data = {
'title': 'Search Results',
'book_results': book_results,
@ -645,7 +653,7 @@ def book_page(request, book_id):
@require_GET
def edit_book_page(request, book_id):
''' info about a book '''
book = books_manager.get_edition(book_id)
book = get_edition(book_id)
if not book.description:
book.description = book.parent_work.description
data = {

View file

@ -20,8 +20,9 @@ app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
app.autodiscover_tasks(['bookwyrm'], related_name='activitypub.base_activity')
app.autodiscover_tasks(['bookwyrm'], related_name='books_manager')
app.autodiscover_tasks(['bookwyrm'], related_name='broadcast')
app.autodiscover_tasks(
['bookwyrm'], related_name='connectors.abstract_connector')
app.autodiscover_tasks(['bookwyrm'], related_name='emailing')
app.autodiscover_tasks(['bookwyrm'], related_name='goodreads_import')
app.autodiscover_tasks(['bookwyrm'], related_name='incoming')