Handle uploaded CSV and match to openlibrary titles.

This commit is contained in:
Adam Kelly 2020-03-25 12:29:21 +00:00
parent 2188371f44
commit ce446d57fc
4 changed files with 68 additions and 44 deletions

View file

@ -31,6 +31,7 @@ class OpenLibraryConnector(AbstractConnector):
key, key,
author[0], author[0],
doc.get('first_publish_year'), doc.get('first_publish_year'),
doc
)) ))
return results return results

View file

@ -0,0 +1,59 @@
import re
import csv
import itertools
from requests import HTTPError
from fedireads import books_manager
def unquote_string(text):
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
else:
return text
def construct_search_term(title, author):
# Strip brackets (usually series title from search term)
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
# Open library doesn't like including author initials in search term.
author = re.sub(r'(\w\.)+\s*', '', author)
return ' '.join([title, author])
class GoodreadsCsv(object):
def __init__(self, csv_file):
self.reader = csv.DictReader(csv_file)
def __iter__(self):
for line in itertools.islice(self.reader, 20, 30):
entry = GoodreadsItem(line)
try:
entry.resolve()
except HTTPError:
pass
yield entry
class GoodreadsItem(object):
def __init__(self, line):
self.line = line
self.book = None
def resolve(self):
self.book = self.get_book_from_isbn()
if not self.book:
self.book = self.get_book_from_title_author()
def get_book_from_isbn(self):
isbn = unquote_string(self.line['ISBN13'])
search_results = books_manager.search(isbn)
if search_results:
return books_manager.get_or_create_book(search_results[0].key)
def get_book_from_title_author(self):
search_term = construct_search_term(self.line['Title'], self.line['Author'])
search_results = books_manager.search(search_term)
if search_results:
return books_manager.get_or_create_book(search_results[0].key)
def __repr__(self):
return "<GoodreadsItem {!r}>".format(self.line['Title'])

View file

@ -18,8 +18,7 @@
<ul> <ul>
{% for book in failures %} {% for book in failures %}
<li> <li>
{{ book.Title }} {{ book }}
{{ book.Author }}
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>

View file

@ -1,15 +1,16 @@
''' views for actions you can take in the application ''' ''' views for actions you can take in the application '''
from io import TextIOWrapper
from django.contrib.auth import authenticate, login, logout from django.contrib.auth import authenticate, login, logout
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.http import HttpResponseBadRequest from django.http import HttpResponseBadRequest
from django.shortcuts import redirect from django.shortcuts import redirect
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
import re
import csv
from fedireads import forms, models, books_manager, outgoing from fedireads import forms, models, books_manager, outgoing
from fedireads.settings import DOMAIN from fedireads.settings import DOMAIN
from fedireads.views import get_user_from_username from fedireads.views import get_user_from_username
from fedireads.goodreads_import import GoodreadsCsv
def user_login(request): def user_login(request):
@ -289,54 +290,18 @@ def delete_follow_request(request):
outgoing.handle_outgoing_reject(requester, request.user, follow_request) outgoing.handle_outgoing_reject(requester, request.user, follow_request)
return redirect('/user/%s' % request.user.localname) return redirect('/user/%s' % request.user.localname)
def unquote_string(text):
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
else:
return text
def construct_search_term(title, author):
# Strip brackets (usually series title from search term)
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
# Open library doesn't like including author initials in search term.
author = re.sub(r'(\w\.)+\s*', '', author)
return ' '.join([title, author])
import itertools
from io import TextIOWrapper
from requests import HTTPError
@login_required @login_required
def import_data(request): def import_data(request):
form = forms.ImportForm(request.POST, request.FILES) form = forms.ImportForm(request.POST, request.FILES)
if form.is_valid(): if form.is_valid():
reader = csv.DictReader(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding))
results = [] results = []
failures = [] failures = []
for line in itertools.islice(reader, 20): for item in GoodreadsCsv(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding)):
isbn = unquote_string(line['ISBN13']) if item.book:
print(line['Title'], isbn, line['Exclusive Shelf']) results.append(item.book)
search_results = books_manager.search(isbn)
if search_results:
book = books_manager.get_or_create_book(search_results[0].key)
print(book)
results.append(book)
else: else:
try: failures.append(item)
search_term = construct_search_term(line['Title'], line['Author'])
print("Search term: ", search_term)
search_results = books_manager.search(search_term)
if search_results:
book = books_manager.get_or_create_book(search_results[0].key)
print(book)
results.append(book)
else:
failures.append(line)
except HTTPError:
failures.append(line) #
return TemplateResponse(request, 'import_results.html', { return TemplateResponse(request, 'import_results.html', {
'results': results, 'results': results,
'failures': failures 'failures': failures