mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-28 19:10:35 +00:00
Fixes model deduplication from data
This commit is contained in:
parent
141e7b90e8
commit
a444c5f6fc
4 changed files with 189 additions and 70 deletions
|
@ -109,7 +109,7 @@ class ActivitypubMixin:
|
|||
not field.deduplication_field:
|
||||
continue
|
||||
|
||||
value = data.get(field.activitypub_field)
|
||||
value = data.get(field.get_activitypub_field())
|
||||
if not value:
|
||||
continue
|
||||
filters.append({field.name: value})
|
||||
|
|
|
@ -4,7 +4,8 @@ import responses
|
|||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.connectors import abstract_connector
|
||||
from bookwyrm.connectors.abstract_connector import Mapping, SearchResult
|
||||
from bookwyrm.connectors.abstract_connector import Mapping
|
||||
from bookwyrm.settings import DOMAIN
|
||||
|
||||
|
||||
class AbstractConnector(TestCase):
|
||||
|
@ -19,86 +20,92 @@ class AbstractConnector(TestCase):
|
|||
covers_url='https://example.com/covers',
|
||||
search_url='https://example.com/search?q=',
|
||||
)
|
||||
work_data = {
|
||||
'id': 'abc1',
|
||||
'title': 'Test work',
|
||||
'type': 'work',
|
||||
'openlibraryKey': 'OL1234W',
|
||||
}
|
||||
self.work_data = work_data
|
||||
edition_data = {
|
||||
'id': 'abc2',
|
||||
'title': 'Test edition',
|
||||
'type': 'edition',
|
||||
'openlibraryKey': 'OL1234M',
|
||||
}
|
||||
self.edition_data = edition_data
|
||||
|
||||
class TestConnector(abstract_connector.AbstractMinimalConnector):
|
||||
class TestConnector(abstract_connector.AbstractConnector):
|
||||
''' nothing added here '''
|
||||
def format_search_result(self, search_result):
|
||||
return search_result
|
||||
def get_or_create_book(self, remote_id):
|
||||
pass
|
||||
def parse_search_data(self, data):
|
||||
return data
|
||||
self.test_connector = TestConnector('example.com')
|
||||
def is_work_data(self, data):
|
||||
return data['type'] == 'work'
|
||||
def get_edition_from_work_data(self, data):
|
||||
return edition_data
|
||||
def get_work_from_edition_data(self, data):
|
||||
return work_data
|
||||
def get_authors_from_data(self, data):
|
||||
return []
|
||||
def expand_book_data(self, book):
|
||||
pass
|
||||
self.connector = TestConnector('example.com')
|
||||
self.connector.book_mappings = [
|
||||
Mapping('id'),
|
||||
Mapping('title'),
|
||||
Mapping('openlibraryKey'),
|
||||
]
|
||||
|
||||
self.book = models.Edition.objects.create(
|
||||
title='Test Book', remote_id='https://example.com/book/1234',
|
||||
openlibrary_key='OL1234M')
|
||||
|
||||
|
||||
def test_abstract_minimal_connector_init(self):
|
||||
def test_abstract_connector_init(self):
|
||||
''' barebones connector for search with defaults '''
|
||||
connector = self.test_connector
|
||||
self.assertEqual(connector.connector, self.connector_info)
|
||||
self.assertEqual(connector.base_url, 'https://example.com')
|
||||
self.assertEqual(connector.books_url, 'https://example.com/books')
|
||||
self.assertEqual(connector.covers_url, 'https://example.com/covers')
|
||||
self.assertEqual(connector.search_url, 'https://example.com/search?q=')
|
||||
self.assertIsNone(connector.name)
|
||||
self.assertEqual(connector.identifier, 'example.com')
|
||||
self.assertIsNone(connector.max_query_count)
|
||||
self.assertFalse(connector.local)
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_search(self):
|
||||
''' makes an http request to the outside service '''
|
||||
responses.add(
|
||||
responses.GET,
|
||||
'https://example.com/search?q=a%20book%20title',
|
||||
json=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'],
|
||||
status=200)
|
||||
results = self.test_connector.search('a book title')
|
||||
self.assertEqual(len(results), 10)
|
||||
self.assertEqual(results[0], 'a')
|
||||
self.assertEqual(results[1], 'b')
|
||||
self.assertEqual(results[2], 'c')
|
||||
self.assertIsInstance(self.connector.book_mappings, list)
|
||||
|
||||
|
||||
def test_is_available(self):
|
||||
''' is the connector usable '''
|
||||
''' this isn't used.... '''
|
||||
self.assertTrue(self.connector.is_available())
|
||||
self.connector.max_query_count = 1
|
||||
self.connector.connector.query_count = 2
|
||||
self.assertFalse(self.connector.is_available())
|
||||
|
||||
|
||||
def test_search_result(self):
|
||||
''' a class that stores info about a search result '''
|
||||
result = SearchResult(
|
||||
title='Title',
|
||||
key='https://example.com/book/1',
|
||||
author='Author Name',
|
||||
year='1850',
|
||||
connector=self.test_connector,
|
||||
def test_get_or_create_book_existing(self):
|
||||
''' find an existing book by remote/origin id '''
|
||||
self.assertEqual(models.Book.objects.count(), 1)
|
||||
self.assertEqual(
|
||||
self.book.remote_id, 'https://%s/book/%d' % (DOMAIN, self.book.id))
|
||||
self.assertEqual(
|
||||
self.book.origin_id, 'https://example.com/book/1234')
|
||||
|
||||
# dedupe by origin id
|
||||
result = self.connector.get_or_create_book(
|
||||
'https://example.com/book/1234')
|
||||
self.assertEqual(models.Book.objects.count(), 1)
|
||||
self.assertEqual(result, self.book)
|
||||
|
||||
# dedupe by remote id
|
||||
result = self.connector.get_or_create_book(
|
||||
'https://%s/book/%d' % (DOMAIN, self.book.id))
|
||||
self.assertEqual(models.Book.objects.count(), 1)
|
||||
self.assertEqual(result, self.book)
|
||||
|
||||
@responses.activate
|
||||
def test_get_or_create_book_deduped(self):
|
||||
''' load remote data and deduplicate '''
|
||||
responses.add(
|
||||
responses.GET,
|
||||
'https://example.com/book/abcd',
|
||||
json=self.edition_data
|
||||
)
|
||||
# there's really not much to test here, it's just a dataclass
|
||||
self.assertEqual(result.confidence, 1)
|
||||
self.assertEqual(result.title, 'Title')
|
||||
|
||||
|
||||
def test_create_mapping(self):
|
||||
''' maps remote fields for book data to bookwyrm activitypub fields '''
|
||||
mapping = Mapping('isbn')
|
||||
self.assertEqual(mapping.local_field, 'isbn')
|
||||
self.assertEqual(mapping.remote_field, 'isbn')
|
||||
self.assertEqual(mapping.formatter('bb'), 'bb')
|
||||
|
||||
|
||||
def test_create_mapping_with_remote(self):
|
||||
''' the remote field is different than the local field '''
|
||||
mapping = Mapping('isbn', remote_field='isbn13')
|
||||
self.assertEqual(mapping.local_field, 'isbn')
|
||||
self.assertEqual(mapping.remote_field, 'isbn13')
|
||||
self.assertEqual(mapping.formatter('bb'), 'bb')
|
||||
|
||||
|
||||
def test_create_mapping_with_formatter(self):
|
||||
''' a function is provided to modify the data '''
|
||||
formatter = lambda x: 'aa' + x
|
||||
mapping = Mapping('isbn', formatter=formatter)
|
||||
self.assertEqual(mapping.local_field, 'isbn')
|
||||
self.assertEqual(mapping.remote_field, 'isbn')
|
||||
self.assertEqual(mapping.formatter, formatter)
|
||||
self.assertEqual(mapping.formatter('bb'), 'aabb')
|
||||
result = self.connector.get_or_create_book(
|
||||
'https://example.com/book/abcd')
|
||||
self.assertEqual(result, self.book)
|
||||
self.assertEqual(models.Edition.objects.count(), 1)
|
||||
self.assertEqual(models.Edition.objects.count(), 1)
|
||||
|
|
100
bookwyrm/tests/connectors/test_abstract_minimal_connector.py
Normal file
100
bookwyrm/tests/connectors/test_abstract_minimal_connector.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
''' testing book data connectors '''
|
||||
from django.test import TestCase
|
||||
import responses
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.connectors import abstract_connector
|
||||
from bookwyrm.connectors.abstract_connector import Mapping, SearchResult
|
||||
|
||||
|
||||
class AbstractConnector(TestCase):
|
||||
''' generic code for connecting to outside data sources '''
|
||||
def setUp(self):
|
||||
''' we need an example connector '''
|
||||
self.connector_info = models.Connector.objects.create(
|
||||
identifier='example.com',
|
||||
connector_file='openlibrary',
|
||||
base_url='https://example.com',
|
||||
books_url='https://example.com/books',
|
||||
covers_url='https://example.com/covers',
|
||||
search_url='https://example.com/search?q=',
|
||||
)
|
||||
|
||||
class TestConnector(abstract_connector.AbstractMinimalConnector):
|
||||
''' nothing added here '''
|
||||
def format_search_result(self, search_result):
|
||||
return search_result
|
||||
def get_or_create_book(self, remote_id):
|
||||
pass
|
||||
def parse_search_data(self, data):
|
||||
return data
|
||||
self.test_connector = TestConnector('example.com')
|
||||
|
||||
|
||||
def test_abstract_minimal_connector_init(self):
|
||||
''' barebones connector for search with defaults '''
|
||||
connector = self.test_connector
|
||||
self.assertEqual(connector.connector, self.connector_info)
|
||||
self.assertEqual(connector.base_url, 'https://example.com')
|
||||
self.assertEqual(connector.books_url, 'https://example.com/books')
|
||||
self.assertEqual(connector.covers_url, 'https://example.com/covers')
|
||||
self.assertEqual(connector.search_url, 'https://example.com/search?q=')
|
||||
self.assertIsNone(connector.name)
|
||||
self.assertEqual(connector.identifier, 'example.com')
|
||||
self.assertIsNone(connector.max_query_count)
|
||||
self.assertFalse(connector.local)
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_search(self):
|
||||
''' makes an http request to the outside service '''
|
||||
responses.add(
|
||||
responses.GET,
|
||||
'https://example.com/search?q=a%20book%20title',
|
||||
json=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'],
|
||||
status=200)
|
||||
results = self.test_connector.search('a book title')
|
||||
self.assertEqual(len(results), 10)
|
||||
self.assertEqual(results[0], 'a')
|
||||
self.assertEqual(results[1], 'b')
|
||||
self.assertEqual(results[2], 'c')
|
||||
|
||||
|
||||
def test_search_result(self):
|
||||
''' a class that stores info about a search result '''
|
||||
result = SearchResult(
|
||||
title='Title',
|
||||
key='https://example.com/book/1',
|
||||
author='Author Name',
|
||||
year='1850',
|
||||
connector=self.test_connector,
|
||||
)
|
||||
# there's really not much to test here, it's just a dataclass
|
||||
self.assertEqual(result.confidence, 1)
|
||||
self.assertEqual(result.title, 'Title')
|
||||
|
||||
|
||||
def test_create_mapping(self):
|
||||
''' maps remote fields for book data to bookwyrm activitypub fields '''
|
||||
mapping = Mapping('isbn')
|
||||
self.assertEqual(mapping.local_field, 'isbn')
|
||||
self.assertEqual(mapping.remote_field, 'isbn')
|
||||
self.assertEqual(mapping.formatter('bb'), 'bb')
|
||||
|
||||
|
||||
def test_create_mapping_with_remote(self):
|
||||
''' the remote field is different than the local field '''
|
||||
mapping = Mapping('isbn', remote_field='isbn13')
|
||||
self.assertEqual(mapping.local_field, 'isbn')
|
||||
self.assertEqual(mapping.remote_field, 'isbn13')
|
||||
self.assertEqual(mapping.formatter('bb'), 'bb')
|
||||
|
||||
|
||||
def test_create_mapping_with_formatter(self):
|
||||
''' a function is provided to modify the data '''
|
||||
formatter = lambda x: 'aa' + x
|
||||
mapping = Mapping('isbn', formatter=formatter)
|
||||
self.assertEqual(mapping.local_field, 'isbn')
|
||||
self.assertEqual(mapping.remote_field, 'isbn')
|
||||
self.assertEqual(mapping.formatter, formatter)
|
||||
self.assertEqual(mapping.formatter('bb'), 'aabb')
|
|
@ -200,3 +200,15 @@ class BaseModel(TestCase):
|
|||
# test subclass match
|
||||
result = models.Status.find_existing_by_remote_id(
|
||||
'https://comment.net')
|
||||
|
||||
|
||||
def test_find_existing(self):
|
||||
''' match a blob of data to a model '''
|
||||
book = models.Edition.objects.create(
|
||||
title='Test edition',
|
||||
openlibrary_key='OL1234',
|
||||
)
|
||||
|
||||
result = models.Edition.find_existing(
|
||||
{'openlibraryKey': 'OL1234'})
|
||||
self.assertEqual(result, book)
|
||||
|
|
Loading…
Reference in a new issue