mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-26 19:41:11 +00:00
Merge pull request #249 from mouse-reeve/import
Change how goodread import writes reviews
This commit is contained in:
commit
f35ed0e555
10 changed files with 117 additions and 86 deletions
|
@ -64,14 +64,14 @@ def load_more_data(book_id):
|
|||
connector.expand_book_data(book)
|
||||
|
||||
|
||||
def search(query):
|
||||
def search(query, min_confidence=0.1):
|
||||
''' find books based on arbitary keywords '''
|
||||
results = []
|
||||
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
|
||||
result_index = set()
|
||||
for connector in get_connectors():
|
||||
try:
|
||||
result_set = connector.search(query)
|
||||
result_set = connector.search(query, min_confidence=min_confidence)
|
||||
except HTTPError:
|
||||
continue
|
||||
|
||||
|
@ -87,16 +87,16 @@ def search(query):
|
|||
return results
|
||||
|
||||
|
||||
def local_search(query):
|
||||
def local_search(query, min_confidence=0.1):
|
||||
''' only look at local search results '''
|
||||
connector = load_connector(models.Connector.objects.get(local=True))
|
||||
return connector.search(query)
|
||||
return connector.search(query, min_confidence=min_confidence)
|
||||
|
||||
|
||||
def first_search_result(query):
|
||||
def first_search_result(query, min_confidence=0.1):
|
||||
''' search until you find a result that fits '''
|
||||
for connector in get_connectors():
|
||||
result = connector.search(query)
|
||||
result = connector.search(query, min_confidence=min_confidence)
|
||||
if result:
|
||||
return result[0]
|
||||
return None
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
''' functionality outline for a book data connector '''
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from dateutil import parser
|
||||
import pytz
|
||||
import requests
|
||||
from requests import HTTPError
|
||||
|
||||
from django.db import transaction
|
||||
|
||||
from bookwyrm import models
|
||||
|
||||
|
||||
class ConnectorException(Exception):
|
||||
class ConnectorException(HTTPError):
|
||||
''' when the connector can't do what was asked '''
|
||||
|
||||
|
||||
|
@ -50,7 +52,7 @@ class AbstractConnector(ABC):
|
|||
return True
|
||||
|
||||
|
||||
def search(self, query):
|
||||
def search(self, query, min_confidence=None):
|
||||
''' free text search '''
|
||||
resp = requests.get(
|
||||
'%s%s' % (self.search_url, query),
|
||||
|
@ -155,9 +157,11 @@ class AbstractConnector(ABC):
|
|||
''' for creating a new book or syncing with data '''
|
||||
book = update_from_mappings(book, data, self.book_mappings)
|
||||
|
||||
author_text = []
|
||||
for author in self.get_authors_from_data(data):
|
||||
book.authors.add(author)
|
||||
book.author_text = ', '.join(a.display_name for a in book.authors.all())
|
||||
author_text.append(author.display_name)
|
||||
book.author_text = ', '.join(author_text)
|
||||
book.save()
|
||||
|
||||
if not update_cover:
|
||||
|
@ -287,25 +291,29 @@ def get_date(date_string):
|
|||
|
||||
def get_data(url):
|
||||
''' wrapper for request.get '''
|
||||
try:
|
||||
resp = requests.get(
|
||||
url,
|
||||
headers={
|
||||
'Accept': 'application/json; charset=utf-8',
|
||||
},
|
||||
)
|
||||
except ConnectionError:
|
||||
raise ConnectorException()
|
||||
if not resp.ok:
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
''' standardized search result object '''
|
||||
def __init__(self, title, key, author, year):
|
||||
self.title = title
|
||||
self.key = key
|
||||
self.author = author
|
||||
self.year = year
|
||||
title: str
|
||||
key: str
|
||||
author: str
|
||||
year: str
|
||||
confidence: int = 1
|
||||
|
||||
def __repr__(self):
|
||||
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
||||
|
|
|
@ -129,10 +129,10 @@ class Connector(AbstractConnector):
|
|||
key = self.books_url + search_result['key']
|
||||
author = search_result.get('author_name') or ['Unknown']
|
||||
return SearchResult(
|
||||
search_result.get('title'),
|
||||
key,
|
||||
', '.join(author),
|
||||
search_result.get('first_publish_year'),
|
||||
title=search_result.get('title'),
|
||||
key=key,
|
||||
author=', '.join(author),
|
||||
year=search_result.get('first_publish_year'),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult
|
|||
|
||||
class Connector(AbstractConnector):
|
||||
''' instantiate a connector '''
|
||||
def search(self, query):
|
||||
def search(self, query, min_confidence=0.1):
|
||||
''' right now you can't search bookwyrm sorry, but when
|
||||
that gets implemented it will totally rule '''
|
||||
vector = SearchVector('title', weight='A') +\
|
||||
|
@ -28,7 +28,7 @@ class Connector(AbstractConnector):
|
|||
).annotate(
|
||||
rank=SearchRank(vector, query)
|
||||
).filter(
|
||||
rank__gt=0
|
||||
rank__gt=min_confidence
|
||||
).order_by('-rank')
|
||||
results = results.filter(default=True) or results
|
||||
|
||||
|
@ -42,11 +42,12 @@ class Connector(AbstractConnector):
|
|||
|
||||
def format_search_result(self, search_result):
|
||||
return SearchResult(
|
||||
search_result.title,
|
||||
search_result.local_id,
|
||||
search_result.author_text,
|
||||
search_result.published_date.year if \
|
||||
title=search_result.title,
|
||||
key=search_result.local_id,
|
||||
author=search_result.author_text,
|
||||
year=search_result.published_date.year if \
|
||||
search_result.published_date else None,
|
||||
confidence=search_result.rank,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -42,13 +42,10 @@ def import_data(job_id):
|
|||
if item.book:
|
||||
item.save()
|
||||
results.append(item)
|
||||
# shelves book and handles reviews
|
||||
outgoing.handle_imported_book(job.user, item)
|
||||
else:
|
||||
item.fail_reason = "Could not match book on OpenLibrary"
|
||||
item.fail_reason = "Could not find a match for book"
|
||||
item.save()
|
||||
|
||||
status = outgoing.handle_import_books(job.user, results)
|
||||
if status:
|
||||
job.import_status = status
|
||||
job.save()
|
||||
finally:
|
||||
create_notification(job.user, 'IMPORT', related_import=job)
|
||||
|
|
17
bookwyrm/migrations/0058_remove_importjob_import_status.py
Normal file
17
bookwyrm/migrations/0058_remove_importjob_import_status.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Generated by Django 3.0.7 on 2020-10-29 23:48
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('bookwyrm', '0057_auto_20201026_2131'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='importjob',
|
||||
name='import_status',
|
||||
),
|
||||
]
|
|
@ -40,8 +40,7 @@ class ImportJob(models.Model):
|
|||
user = models.ForeignKey(User, on_delete=models.CASCADE)
|
||||
created_date = models.DateTimeField(default=timezone.now)
|
||||
task_id = models.CharField(max_length=100, null=True)
|
||||
import_status = models.ForeignKey(
|
||||
'Status', null=True, on_delete=models.PROTECT)
|
||||
|
||||
|
||||
class ImportItem(models.Model):
|
||||
''' a single line of a csv being imported '''
|
||||
|
@ -64,13 +63,17 @@ class ImportItem(models.Model):
|
|||
|
||||
def get_book_from_isbn(self):
|
||||
''' search by isbn '''
|
||||
search_result = books_manager.first_search_result(self.isbn)
|
||||
search_result = books_manager.first_search_result(
|
||||
self.isbn, min_confidence=0.5
|
||||
)
|
||||
if search_result:
|
||||
try:
|
||||
# don't crash the import when the connector fails
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
except ConnectorException:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def get_book_from_title_author(self):
|
||||
''' search by title and author '''
|
||||
|
@ -78,12 +81,16 @@ class ImportItem(models.Model):
|
|||
self.data['Title'],
|
||||
self.data['Author']
|
||||
)
|
||||
search_result = books_manager.first_search_result(search_term)
|
||||
search_result = books_manager.first_search_result(
|
||||
search_term, min_confidence=0.5
|
||||
)
|
||||
if search_result:
|
||||
try:
|
||||
return books_manager.get_or_create_book(search_result.key)
|
||||
except ConnectorException:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
@property
|
||||
def isbn(self):
|
||||
|
@ -95,6 +102,7 @@ class ImportItem(models.Model):
|
|||
''' the goodreads shelf field '''
|
||||
if self.data['Exclusive Shelf']:
|
||||
return GOODREADS_SHELVES.get(self.data['Exclusive Shelf'])
|
||||
return None
|
||||
|
||||
@property
|
||||
def review(self):
|
||||
|
@ -111,12 +119,14 @@ class ImportItem(models.Model):
|
|||
''' when the book was added to this dataset '''
|
||||
if self.data['Date Added']:
|
||||
return dateutil.parser.parse(self.data['Date Added'])
|
||||
return None
|
||||
|
||||
@property
|
||||
def date_read(self):
|
||||
''' the date a book was completed '''
|
||||
if self.data['Date Read']:
|
||||
return dateutil.parser.parse(self.data['Date Read'])
|
||||
return None
|
||||
|
||||
@property
|
||||
def reads(self):
|
||||
|
@ -126,6 +136,7 @@ class ImportItem(models.Model):
|
|||
return [ReadThrough(start_date=self.date_added)]
|
||||
if self.date_read:
|
||||
return [ReadThrough(
|
||||
start_date=self.date_added,
|
||||
finish_date=self.date_read,
|
||||
)]
|
||||
return []
|
||||
|
|
|
@ -155,51 +155,49 @@ def handle_unshelve(user, book, shelf):
|
|||
broadcast(user, activity)
|
||||
|
||||
|
||||
def handle_import_books(user, items):
|
||||
def handle_imported_book(user, item):
|
||||
''' process a goodreads csv and then post about it '''
|
||||
new_books = []
|
||||
for item in items:
|
||||
if isinstance(item.book, models.Work):
|
||||
item.book = item.book.default_edition
|
||||
if not item.book:
|
||||
return
|
||||
|
||||
if item.shelf:
|
||||
desired_shelf = models.Shelf.objects.get(
|
||||
identifier=item.shelf,
|
||||
user=user
|
||||
)
|
||||
if isinstance(item.book, models.Work):
|
||||
item.book = item.book.default_edition
|
||||
if not item.book:
|
||||
continue
|
||||
# shelve the book if it hasn't been shelved already
|
||||
shelf_book, created = models.ShelfBook.objects.get_or_create(
|
||||
book=item.book, shelf=desired_shelf, added_by=user)
|
||||
if created:
|
||||
new_books.append(item.book)
|
||||
activity = shelf_book.to_add_activity(user)
|
||||
broadcast(user, activity)
|
||||
broadcast(user, shelf_book.to_add_activity(user))
|
||||
|
||||
# only add new read-throughs if the item isn't already shelved
|
||||
for read in item.reads:
|
||||
read.book = item.book
|
||||
read.user = user
|
||||
read.save()
|
||||
|
||||
if item.rating or item.review:
|
||||
review_title = 'Review of {!r} on Goodreads'.format(
|
||||
item.book.title,
|
||||
) if item.review else ''
|
||||
|
||||
models.Review.objects.create(
|
||||
# we don't know the publication date of the review,
|
||||
# but "now" is a bad guess
|
||||
published_date_guess = item.date_read or item.date_added
|
||||
review = models.Review.objects.create(
|
||||
user=user,
|
||||
book=item.book,
|
||||
name=review_title,
|
||||
content=item.review,
|
||||
rating=item.rating,
|
||||
published_date=published_date_guess,
|
||||
)
|
||||
for read in item.reads:
|
||||
read.book = item.book
|
||||
read.user = user
|
||||
read.save()
|
||||
|
||||
if new_books:
|
||||
message = 'imported {} books'.format(len(new_books))
|
||||
status = create_generated_note(user, message, mention_books=new_books)
|
||||
status.save()
|
||||
|
||||
broadcast(user, status.to_create_activity(user))
|
||||
return status
|
||||
return None
|
||||
# we don't need to send out pure activities because non-bookwyrm
|
||||
# instances don't need this data
|
||||
broadcast(user, review.to_create_activity(user))
|
||||
|
||||
|
||||
def handle_delete_status(user, status):
|
||||
|
|
|
@ -24,7 +24,7 @@ class ImportJob(TestCase):
|
|||
'Number of Pages': 416,
|
||||
'Year Published': 2019,
|
||||
'Original Publication Year': 2019,
|
||||
'Date Read': '2019/04/09',
|
||||
'Date Read': '2019/04/12',
|
||||
'Date Added': '2019/04/09',
|
||||
'Bookshelves': '',
|
||||
'Bookshelves with positions': '',
|
||||
|
@ -97,11 +97,9 @@ class ImportJob(TestCase):
|
|||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
||||
|
||||
def test_read_reads(self):
|
||||
expected = [models.ReadThrough(
|
||||
finish_date=datetime.datetime(2019, 4, 9, 0, 0))]
|
||||
actual = models.ImportItem.objects.get(index=2)
|
||||
self.assertEqual(actual.reads[0].start_date, expected[0].start_date)
|
||||
self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date)
|
||||
self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0))
|
||||
self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0))
|
||||
|
||||
def test_unread_reads(self):
|
||||
expected = []
|
||||
|
|
|
@ -489,7 +489,8 @@ def book_page(request, book_id):
|
|||
).values_list('identifier', flat=True)
|
||||
|
||||
readthroughs = models.ReadThrough.objects.filter(
|
||||
user=request.user
|
||||
user=request.user,
|
||||
book=book,
|
||||
).order_by('start_date')
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue