mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-27 03:51:08 +00:00
Merge pull request #249 from mouse-reeve/import
Change how goodread import writes reviews
This commit is contained in:
commit
f35ed0e555
10 changed files with 117 additions and 86 deletions
|
@ -64,14 +64,14 @@ def load_more_data(book_id):
|
||||||
connector.expand_book_data(book)
|
connector.expand_book_data(book)
|
||||||
|
|
||||||
|
|
||||||
def search(query):
|
def search(query, min_confidence=0.1):
|
||||||
''' find books based on arbitary keywords '''
|
''' find books based on arbitary keywords '''
|
||||||
results = []
|
results = []
|
||||||
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
||||||
result_index = set()
|
result_index = set()
|
||||||
for connector in get_connectors():
|
for connector in get_connectors():
|
||||||
try:
|
try:
|
||||||
result_set = connector.search(query)
|
result_set = connector.search(query, min_confidence=min_confidence)
|
||||||
except HTTPError:
|
except HTTPError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -87,16 +87,16 @@ def search(query):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def local_search(query):
|
def local_search(query, min_confidence=0.1):
|
||||||
''' only look at local search results '''
|
''' only look at local search results '''
|
||||||
connector = load_connector(models.Connector.objects.get(local=True))
|
connector = load_connector(models.Connector.objects.get(local=True))
|
||||||
return connector.search(query)
|
return connector.search(query, min_confidence=min_confidence)
|
||||||
|
|
||||||
|
|
||||||
def first_search_result(query):
|
def first_search_result(query, min_confidence=0.1):
|
||||||
''' search until you find a result that fits '''
|
''' search until you find a result that fits '''
|
||||||
for connector in get_connectors():
|
for connector in get_connectors():
|
||||||
result = connector.search(query)
|
result = connector.search(query, min_confidence=min_confidence)
|
||||||
if result:
|
if result:
|
||||||
return result[0]
|
return result[0]
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
''' functionality outline for a book data connector '''
|
''' functionality outline for a book data connector '''
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
import pytz
|
import pytz
|
||||||
import requests
|
import requests
|
||||||
|
from requests import HTTPError
|
||||||
|
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
|
|
||||||
|
|
||||||
class ConnectorException(Exception):
|
class ConnectorException(HTTPError):
|
||||||
''' when the connector can't do what was asked '''
|
''' when the connector can't do what was asked '''
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,7 +52,7 @@ class AbstractConnector(ABC):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def search(self, query):
|
def search(self, query, min_confidence=None):
|
||||||
''' free text search '''
|
''' free text search '''
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
'%s%s' % (self.search_url, query),
|
'%s%s' % (self.search_url, query),
|
||||||
|
@ -155,9 +157,11 @@ class AbstractConnector(ABC):
|
||||||
''' for creating a new book or syncing with data '''
|
''' for creating a new book or syncing with data '''
|
||||||
book = update_from_mappings(book, data, self.book_mappings)
|
book = update_from_mappings(book, data, self.book_mappings)
|
||||||
|
|
||||||
|
author_text = []
|
||||||
for author in self.get_authors_from_data(data):
|
for author in self.get_authors_from_data(data):
|
||||||
book.authors.add(author)
|
book.authors.add(author)
|
||||||
book.author_text = ', '.join(a.display_name for a in book.authors.all())
|
author_text.append(author.display_name)
|
||||||
|
book.author_text = ', '.join(author_text)
|
||||||
book.save()
|
book.save()
|
||||||
|
|
||||||
if not update_cover:
|
if not update_cover:
|
||||||
|
@ -287,25 +291,29 @@ def get_date(date_string):
|
||||||
|
|
||||||
def get_data(url):
|
def get_data(url):
|
||||||
''' wrapper for request.get '''
|
''' wrapper for request.get '''
|
||||||
|
try:
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
url,
|
url,
|
||||||
headers={
|
headers={
|
||||||
'Accept': 'application/json; charset=utf-8',
|
'Accept': 'application/json; charset=utf-8',
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
except ConnectionError:
|
||||||
|
raise ConnectorException()
|
||||||
if not resp.ok:
|
if not resp.ok:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
class SearchResult:
|
class SearchResult:
|
||||||
''' standardized search result object '''
|
''' standardized search result object '''
|
||||||
def __init__(self, title, key, author, year):
|
title: str
|
||||||
self.title = title
|
key: str
|
||||||
self.key = key
|
author: str
|
||||||
self.author = author
|
year: str
|
||||||
self.year = year
|
confidence: int = 1
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
||||||
|
|
|
@ -129,10 +129,10 @@ class Connector(AbstractConnector):
|
||||||
key = self.books_url + search_result['key']
|
key = self.books_url + search_result['key']
|
||||||
author = search_result.get('author_name') or ['Unknown']
|
author = search_result.get('author_name') or ['Unknown']
|
||||||
return SearchResult(
|
return SearchResult(
|
||||||
search_result.get('title'),
|
title=search_result.get('title'),
|
||||||
key,
|
key=key,
|
||||||
', '.join(author),
|
author=', '.join(author),
|
||||||
search_result.get('first_publish_year'),
|
year=search_result.get('first_publish_year'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult
|
||||||
|
|
||||||
class Connector(AbstractConnector):
|
class Connector(AbstractConnector):
|
||||||
''' instantiate a connector '''
|
''' instantiate a connector '''
|
||||||
def search(self, query):
|
def search(self, query, min_confidence=0.1):
|
||||||
''' right now you can't search bookwyrm sorry, but when
|
''' right now you can't search bookwyrm sorry, but when
|
||||||
that gets implemented it will totally rule '''
|
that gets implemented it will totally rule '''
|
||||||
vector = SearchVector('title', weight='A') +\
|
vector = SearchVector('title', weight='A') +\
|
||||||
|
@ -28,7 +28,7 @@ class Connector(AbstractConnector):
|
||||||
).annotate(
|
).annotate(
|
||||||
rank=SearchRank(vector, query)
|
rank=SearchRank(vector, query)
|
||||||
).filter(
|
).filter(
|
||||||
rank__gt=0
|
rank__gt=min_confidence
|
||||||
).order_by('-rank')
|
).order_by('-rank')
|
||||||
results = results.filter(default=True) or results
|
results = results.filter(default=True) or results
|
||||||
|
|
||||||
|
@ -42,11 +42,12 @@ class Connector(AbstractConnector):
|
||||||
|
|
||||||
def format_search_result(self, search_result):
|
def format_search_result(self, search_result):
|
||||||
return SearchResult(
|
return SearchResult(
|
||||||
search_result.title,
|
title=search_result.title,
|
||||||
search_result.local_id,
|
key=search_result.local_id,
|
||||||
search_result.author_text,
|
author=search_result.author_text,
|
||||||
search_result.published_date.year if \
|
year=search_result.published_date.year if \
|
||||||
search_result.published_date else None,
|
search_result.published_date else None,
|
||||||
|
confidence=search_result.rank,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,13 +42,10 @@ def import_data(job_id):
|
||||||
if item.book:
|
if item.book:
|
||||||
item.save()
|
item.save()
|
||||||
results.append(item)
|
results.append(item)
|
||||||
|
# shelves book and handles reviews
|
||||||
|
outgoing.handle_imported_book(job.user, item)
|
||||||
else:
|
else:
|
||||||
item.fail_reason = "Could not match book on OpenLibrary"
|
item.fail_reason = "Could not find a match for book"
|
||||||
item.save()
|
item.save()
|
||||||
|
|
||||||
status = outgoing.handle_import_books(job.user, results)
|
|
||||||
if status:
|
|
||||||
job.import_status = status
|
|
||||||
job.save()
|
|
||||||
finally:
|
finally:
|
||||||
create_notification(job.user, 'IMPORT', related_import=job)
|
create_notification(job.user, 'IMPORT', related_import=job)
|
||||||
|
|
17
bookwyrm/migrations/0058_remove_importjob_import_status.py
Normal file
17
bookwyrm/migrations/0058_remove_importjob_import_status.py
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# Generated by Django 3.0.7 on 2020-10-29 23:48
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('bookwyrm', '0057_auto_20201026_2131'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='importjob',
|
||||||
|
name='import_status',
|
||||||
|
),
|
||||||
|
]
|
|
@ -40,8 +40,7 @@ class ImportJob(models.Model):
|
||||||
user = models.ForeignKey(User, on_delete=models.CASCADE)
|
user = models.ForeignKey(User, on_delete=models.CASCADE)
|
||||||
created_date = models.DateTimeField(default=timezone.now)
|
created_date = models.DateTimeField(default=timezone.now)
|
||||||
task_id = models.CharField(max_length=100, null=True)
|
task_id = models.CharField(max_length=100, null=True)
|
||||||
import_status = models.ForeignKey(
|
|
||||||
'Status', null=True, on_delete=models.PROTECT)
|
|
||||||
|
|
||||||
class ImportItem(models.Model):
|
class ImportItem(models.Model):
|
||||||
''' a single line of a csv being imported '''
|
''' a single line of a csv being imported '''
|
||||||
|
@ -64,13 +63,17 @@ class ImportItem(models.Model):
|
||||||
|
|
||||||
def get_book_from_isbn(self):
|
def get_book_from_isbn(self):
|
||||||
''' search by isbn '''
|
''' search by isbn '''
|
||||||
search_result = books_manager.first_search_result(self.isbn)
|
search_result = books_manager.first_search_result(
|
||||||
|
self.isbn, min_confidence=0.5
|
||||||
|
)
|
||||||
if search_result:
|
if search_result:
|
||||||
try:
|
try:
|
||||||
# don't crash the import when the connector fails
|
# don't crash the import when the connector fails
|
||||||
return books_manager.get_or_create_book(search_result.key)
|
return books_manager.get_or_create_book(search_result.key)
|
||||||
except ConnectorException:
|
except ConnectorException:
|
||||||
pass
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_book_from_title_author(self):
|
def get_book_from_title_author(self):
|
||||||
''' search by title and author '''
|
''' search by title and author '''
|
||||||
|
@ -78,12 +81,16 @@ class ImportItem(models.Model):
|
||||||
self.data['Title'],
|
self.data['Title'],
|
||||||
self.data['Author']
|
self.data['Author']
|
||||||
)
|
)
|
||||||
search_result = books_manager.first_search_result(search_term)
|
search_result = books_manager.first_search_result(
|
||||||
|
search_term, min_confidence=0.5
|
||||||
|
)
|
||||||
if search_result:
|
if search_result:
|
||||||
try:
|
try:
|
||||||
return books_manager.get_or_create_book(search_result.key)
|
return books_manager.get_or_create_book(search_result.key)
|
||||||
except ConnectorException:
|
except ConnectorException:
|
||||||
pass
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def isbn(self):
|
def isbn(self):
|
||||||
|
@ -95,6 +102,7 @@ class ImportItem(models.Model):
|
||||||
''' the goodreads shelf field '''
|
''' the goodreads shelf field '''
|
||||||
if self.data['Exclusive Shelf']:
|
if self.data['Exclusive Shelf']:
|
||||||
return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
|
return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
|
||||||
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def review(self):
|
def review(self):
|
||||||
|
@ -111,12 +119,14 @@ class ImportItem(models.Model):
|
||||||
''' when the book was added to this dataset '''
|
''' when the book was added to this dataset '''
|
||||||
if self.data['Date Added']:
|
if self.data['Date Added']:
|
||||||
return dateutil.parser.parse(self.data['Date Added'])
|
return dateutil.parser.parse(self.data['Date Added'])
|
||||||
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def date_read(self):
|
def date_read(self):
|
||||||
''' the date a book was completed '''
|
''' the date a book was completed '''
|
||||||
if self.data['Date Read']:
|
if self.data['Date Read']:
|
||||||
return dateutil.parser.parse(self.data['Date Read'])
|
return dateutil.parser.parse(self.data['Date Read'])
|
||||||
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def reads(self):
|
def reads(self):
|
||||||
|
@ -126,6 +136,7 @@ class ImportItem(models.Model):
|
||||||
return [ReadThrough(start_date=self.date_added)]
|
return [ReadThrough(start_date=self.date_added)]
|
||||||
if self.date_read:
|
if self.date_read:
|
||||||
return [ReadThrough(
|
return [ReadThrough(
|
||||||
|
start_date=self.date_added,
|
||||||
finish_date=self.date_read,
|
finish_date=self.date_read,
|
||||||
)]
|
)]
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -155,51 +155,49 @@ def handle_unshelve(user, book, shelf):
|
||||||
broadcast(user, activity)
|
broadcast(user, activity)
|
||||||
|
|
||||||
|
|
||||||
def handle_import_books(user, items):
|
def handle_imported_book(user, item):
|
||||||
''' process a goodreads csv and then post about it '''
|
''' process a goodreads csv and then post about it '''
|
||||||
new_books = []
|
if isinstance(item.book, models.Work):
|
||||||
for item in items:
|
item.book = item.book.default_edition
|
||||||
|
if not item.book:
|
||||||
|
return
|
||||||
|
|
||||||
if item.shelf:
|
if item.shelf:
|
||||||
desired_shelf = models.Shelf.objects.get(
|
desired_shelf = models.Shelf.objects.get(
|
||||||
identifier=item.shelf,
|
identifier=item.shelf,
|
||||||
user=user
|
user=user
|
||||||
)
|
)
|
||||||
if isinstance(item.book, models.Work):
|
# shelve the book if it hasn't been shelved already
|
||||||
item.book = item.book.default_edition
|
|
||||||
if not item.book:
|
|
||||||
continue
|
|
||||||
shelf_book, created = models.ShelfBook.objects.get_or_create(
|
shelf_book, created = models.ShelfBook.objects.get_or_create(
|
||||||
book=item.book, shelf=desired_shelf, added_by=user)
|
book=item.book, shelf=desired_shelf, added_by=user)
|
||||||
if created:
|
if created:
|
||||||
new_books.append(item.book)
|
broadcast(user, shelf_book.to_add_activity(user))
|
||||||
activity = shelf_book.to_add_activity(user)
|
|
||||||
broadcast(user, activity)
|
# only add new read-throughs if the item isn't already shelved
|
||||||
|
for read in item.reads:
|
||||||
|
read.book = item.book
|
||||||
|
read.user = user
|
||||||
|
read.save()
|
||||||
|
|
||||||
if item.rating or item.review:
|
if item.rating or item.review:
|
||||||
review_title = 'Review of {!r} on Goodreads'.format(
|
review_title = 'Review of {!r} on Goodreads'.format(
|
||||||
item.book.title,
|
item.book.title,
|
||||||
) if item.review else ''
|
) if item.review else ''
|
||||||
|
|
||||||
models.Review.objects.create(
|
# we don't know the publication date of the review,
|
||||||
|
# but "now" is a bad guess
|
||||||
|
published_date_guess = item.date_read or item.date_added
|
||||||
|
review = models.Review.objects.create(
|
||||||
user=user,
|
user=user,
|
||||||
book=item.book,
|
book=item.book,
|
||||||
name=review_title,
|
name=review_title,
|
||||||
content=item.review,
|
content=item.review,
|
||||||
rating=item.rating,
|
rating=item.rating,
|
||||||
|
published_date=published_date_guess,
|
||||||
)
|
)
|
||||||
for read in item.reads:
|
# we don't need to send out pure activities because non-bookwyrm
|
||||||
read.book = item.book
|
# instances don't need this data
|
||||||
read.user = user
|
broadcast(user, review.to_create_activity(user))
|
||||||
read.save()
|
|
||||||
|
|
||||||
if new_books:
|
|
||||||
message = 'imported {} books'.format(len(new_books))
|
|
||||||
status = create_generated_note(user, message, mention_books=new_books)
|
|
||||||
status.save()
|
|
||||||
|
|
||||||
broadcast(user, status.to_create_activity(user))
|
|
||||||
return status
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def handle_delete_status(user, status):
|
def handle_delete_status(user, status):
|
||||||
|
|
|
@ -24,7 +24,7 @@ class ImportJob(TestCase):
|
||||||
'Number of Pages': 416,
|
'Number of Pages': 416,
|
||||||
'Year Published': 2019,
|
'Year Published': 2019,
|
||||||
'Original Publication Year': 2019,
|
'Original Publication Year': 2019,
|
||||||
'Date Read': '2019/04/09',
|
'Date Read': '2019/04/12',
|
||||||
'Date Added': '2019/04/09',
|
'Date Added': '2019/04/09',
|
||||||
'Bookshelves': '',
|
'Bookshelves': '',
|
||||||
'Bookshelves with positions': '',
|
'Bookshelves with positions': '',
|
||||||
|
@ -97,11 +97,9 @@ class ImportJob(TestCase):
|
||||||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
||||||
|
|
||||||
def test_read_reads(self):
|
def test_read_reads(self):
|
||||||
expected = [models.ReadThrough(
|
|
||||||
finish_date=datetime.datetime(2019, 4, 9, 0, 0))]
|
|
||||||
actual = models.ImportItem.objects.get(index=2)
|
actual = models.ImportItem.objects.get(index=2)
|
||||||
self.assertEqual(actual.reads[0].start_date, expected[0].start_date)
|
self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0))
|
||||||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0))
|
||||||
|
|
||||||
def test_unread_reads(self):
|
def test_unread_reads(self):
|
||||||
expected = []
|
expected = []
|
||||||
|
|
|
@ -489,7 +489,8 @@ def book_page(request, book_id):
|
||||||
).values_list('identifier', flat=True)
|
).values_list('identifier', flat=True)
|
||||||
|
|
||||||
readthroughs = models.ReadThrough.objects.filter(
|
readthroughs = models.ReadThrough.objects.filter(
|
||||||
user=request.user
|
user=request.user,
|
||||||
|
book=book,
|
||||||
).order_by('start_date')
|
).order_by('start_date')
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue