Merge pull request #96 from cthulahoops/goodreads_import

Goodreads import
This commit is contained in:
Mouse Reeve 2020-03-27 09:10:00 -07:00 committed by GitHub
commit 072e8fe02a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 181 additions and 4 deletions

View file

@ -53,8 +53,12 @@ class AbstractConnector(ABC):
class SearchResult(object):
''' standardized search result object '''
def __init__(self, title, key, author, year):
def __init__(self, title, key, author, year, raw_data):
self.title = title
self.key = key
self.author = author
self.year = year
self.raw_data = raw_data
def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format(self.key, self.title, self.author)

View file

@ -31,6 +31,7 @@ class OpenLibraryConnector(AbstractConnector):
key,
author[0],
doc.get('first_publish_year'),
doc
))
return results

View file

@ -1,6 +1,7 @@
''' usin django model forms '''
from django.core.validators import MaxValueValidator, MinValueValidator
from django.forms import ModelForm, PasswordInput, IntegerField
from django import forms
from fedireads import models
@ -73,3 +74,6 @@ class TagForm(ModelForm):
help_texts = {f: None for f in fields}
labels = {'name': 'Add a tag'}
class ImportForm(forms.Form):
csv_file = forms.FileField()

View file

@ -0,0 +1,75 @@
import re
import csv
import itertools
from requests import HTTPError
from fedireads import books_manager
# Mapping goodreads -> fedireads shelf titles.
GOODREADS_SHELVES = {
'read': 'read',
'currently-reading': 'reading',
'to-read': 'to-read',
}
MAX_ENTRIES = 20
def unquote_string(text):
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
else:
return text
def construct_search_term(title, author):
# Strip brackets (usually series title from search term)
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
# Open library doesn't like including author initials in search term.
author = re.sub(r'(\w\.)+\s*', '', author)
return ' '.join([title, author])
class GoodreadsCsv(object):
def __init__(self, csv_file):
self.reader = csv.DictReader(csv_file)
def __iter__(self):
for line in itertools.islice(self.reader, MAX_ENTRIES):
entry = GoodreadsItem(line)
try:
entry.resolve()
except HTTPError:
pass
yield entry
class GoodreadsItem(object):
def __init__(self, line):
self.line = line
self.book = None
def resolve(self):
self.book = self.get_book_from_isbn()
if not self.book:
self.book = self.get_book_from_title_author()
def get_book_from_isbn(self):
isbn = unquote_string(self.line['ISBN13'])
search_results = books_manager.search(isbn)
if search_results:
return books_manager.get_or_create_book(search_results[0].key)
def get_book_from_title_author(self):
search_term = construct_search_term(self.line['Title'], self.line['Author'])
search_results = books_manager.search(search_term)
if search_results:
return books_manager.get_or_create_book(search_results[0].key)
@property
def shelf(self):
if self.line['Exclusive Shelf']:
return GOODREADS_SHELVES[self.line['Exclusive Shelf']]
def __repr__(self):
return "<GoodreadsItem {!r}>".format(self.line['Title'])
def __str__(self):
return "{} by {}".format(self.line['Title'], self.line['Author'])

View file

@ -54,6 +54,9 @@ class Book(FedireadsModel):
model_name = type(self).__name__.lower()
return '%s/%s/%s' % (base_path, model_name, self.openlibrary_key)
def __repr__(self):
return "<{} key={!r} title={!r} author={!r}>".format(self.__class__, self.openlibrary_key, self.title, self.author)
class Work(Book):
''' a work (an abstract concept of a book that manifests in an edition) '''

View file

@ -157,6 +157,31 @@ def handle_unshelve(user, book, shelf):
broadcast(user, activity, recipients)
def handle_import_books(user, items):
new_books = []
for item in items:
if item.shelf:
desired_shelf = models.Shelf.objects.get(
identifier=item.shelf,
user=user
)
shelf, created = models.ShelfBook.objects.get_or_create(book=item.book, shelf=desired_shelf, added_by=user)
if created:
new_books.append(item.book)
activity = activitypub.get_add(user, item.book, desired_shelf)
recipients = get_recipients(user, 'public')
broadcast(user, activity, recipients)
if new_books:
message = 'imported {} books'.format(len(new_books))
status = create_status(user, message, mention_books=new_books)
status.status_type = 'Update'
status.save()
create_activity = activitypub.get_create(user, activitypub.get_status(status))
broadcast(user, create_activity, get_recipients(user, 'public'))
def handle_review(user, book, name, content, rating):
''' post a review '''
# validated and saves the review in the database so it has an id

View file

@ -0,0 +1,10 @@
{% extends 'layout.html' %}
{% block content %}
<div id="content">
<form name="import" action="/import_data/" method="post" enctype="multipart/form-data">
{% csrf_token %}
{{ import_form.as_p }}
<button type="submit">Import</button>
</form>
</div>
{% endblock %}

View file

@ -0,0 +1,18 @@
{% extends 'layout.html' %}
{% block content %}
<div id="content">
<div>
<h1>The following books could not be imported: </h1>
<ul>
{% for item in failures %}
<li>
{{ item }}
</li>
{% endfor %}
</ul>
<p>{{ success_count }} books imported successfully</p>
</div>
</div>
{% endblock %}

View file

@ -31,6 +31,7 @@
{% endif %}
<li><a href="/#feed">Updates</a></li>
<li><a href="/books">Discover Books</a></li>
<li><a href="/import">Import Books</a><li>
</ul>
<div id="actions">

View file

@ -21,13 +21,15 @@
</h2>
{% if not hide_book and status.mention_books.count %}
{% for book in status.mention_books.all|slice:"0:3" %}
<div class="book-preview">
{% if status.status_type == 'Review' %}
{% include 'snippets/book.html' with book=status.mention_books.first %}
{% include 'snippets/book.html' with book=book %}
{% else %}
{% include 'snippets/book.html' with book=status.mention_books.first description=True %}
{% include 'snippets/book.html' with book=book description=True %}
{% endif %}
</div>
{% endfor %}
{% endif %}
{% if not hide_book and status.book%}
<div class="book-preview">

View file

@ -34,6 +34,7 @@ urlpatterns = [
re_path(r'^(?P<tab>home|local|federated)/?$', views.home_tab),
re_path(r'^notifications/?', views.notifications_page),
re_path(r'books/?$', views.books_page),
re_path(r'import/?$', views.import_page),
# should return a ui view or activitypub json blob as requested
# users
@ -81,5 +82,6 @@ urlpatterns = [
re_path(r'^accept_follow_request/?$', actions.accept_follow_request),
re_path(r'^delete_follow_request/?$', actions.delete_follow_request),
re_path(r'import_data', actions.import_data),
] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

View file

@ -1,14 +1,16 @@
''' views for actions you can take in the application '''
from io import TextIOWrapper
from django.contrib.auth import authenticate, login, logout
from django.contrib.auth.decorators import login_required
from django.http import HttpResponseBadRequest
from django.shortcuts import redirect
from django.template.response import TemplateResponse
import re
from fedireads import forms, models, books_manager, outgoing
from fedireads.settings import DOMAIN
from fedireads.views import get_user_from_username
from fedireads.goodreads_import import GoodreadsCsv
def user_login(request):
@ -288,4 +290,26 @@ def delete_follow_request(request):
outgoing.handle_outgoing_reject(requester, request.user, follow_request)
return redirect('/user/%s' % request.user.localname)
@login_required
def import_data(request):
form = forms.ImportForm(request.POST, request.FILES)
if form.is_valid():
results = []
failures = []
for item in GoodreadsCsv(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding)):
if item.book:
results.append(item)
else:
failures.append(item)
outgoing.handle_import_books(request.user, results)
if failures:
return TemplateResponse(request, 'import_results.html', {
'success_count': len(results),
'failures': failures,
})
else:
return redirect('/')
else:
return HttpResponseBadRequest()

View file

@ -108,6 +108,14 @@ def books_page(request):
}
return TemplateResponse(request, 'books.html', data)
@login_required
def import_page(request):
''' import history from goodreads '''
return TemplateResponse(request, 'import.html', {
'import_form': forms.ImportForm(),
})
def login_page(request):
''' authentication '''