mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-10 17:25:35 +00:00
Handle uploaded CSV and match to openlibrary titles.
This commit is contained in:
parent
2188371f44
commit
ce446d57fc
4 changed files with 68 additions and 44 deletions
|
@ -31,6 +31,7 @@ class OpenLibraryConnector(AbstractConnector):
|
||||||
key,
|
key,
|
||||||
author[0],
|
author[0],
|
||||||
doc.get('first_publish_year'),
|
doc.get('first_publish_year'),
|
||||||
|
doc
|
||||||
))
|
))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
59
fedireads/goodreads_import.py
Normal file
59
fedireads/goodreads_import.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
import re
|
||||||
|
import csv
|
||||||
|
import itertools
|
||||||
|
from requests import HTTPError
|
||||||
|
|
||||||
|
from fedireads import books_manager
|
||||||
|
|
||||||
|
def unquote_string(text):
|
||||||
|
match = re.match(r'="([^"]*)"', text)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def construct_search_term(title, author):
|
||||||
|
# Strip brackets (usually series title from search term)
|
||||||
|
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
|
||||||
|
# Open library doesn't like including author initials in search term.
|
||||||
|
author = re.sub(r'(\w\.)+\s*', '', author)
|
||||||
|
|
||||||
|
return ' '.join([title, author])
|
||||||
|
|
||||||
|
class GoodreadsCsv(object):
|
||||||
|
def __init__(self, csv_file):
|
||||||
|
self.reader = csv.DictReader(csv_file)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for line in itertools.islice(self.reader, 20, 30):
|
||||||
|
entry = GoodreadsItem(line)
|
||||||
|
try:
|
||||||
|
entry.resolve()
|
||||||
|
except HTTPError:
|
||||||
|
pass
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
class GoodreadsItem(object):
|
||||||
|
def __init__(self, line):
|
||||||
|
self.line = line
|
||||||
|
self.book = None
|
||||||
|
|
||||||
|
def resolve(self):
|
||||||
|
self.book = self.get_book_from_isbn()
|
||||||
|
if not self.book:
|
||||||
|
self.book = self.get_book_from_title_author()
|
||||||
|
|
||||||
|
def get_book_from_isbn(self):
|
||||||
|
isbn = unquote_string(self.line['ISBN13'])
|
||||||
|
search_results = books_manager.search(isbn)
|
||||||
|
if search_results:
|
||||||
|
return books_manager.get_or_create_book(search_results[0].key)
|
||||||
|
|
||||||
|
def get_book_from_title_author(self):
|
||||||
|
search_term = construct_search_term(self.line['Title'], self.line['Author'])
|
||||||
|
search_results = books_manager.search(search_term)
|
||||||
|
if search_results:
|
||||||
|
return books_manager.get_or_create_book(search_results[0].key)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<GoodreadsItem {!r}>".format(self.line['Title'])
|
|
@ -18,8 +18,7 @@
|
||||||
<ul>
|
<ul>
|
||||||
{% for book in failures %}
|
{% for book in failures %}
|
||||||
<li>
|
<li>
|
||||||
{{ book.Title }}
|
{{ book }}
|
||||||
{{ book.Author }}
|
|
||||||
</li>
|
</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
''' views for actions you can take in the application '''
|
''' views for actions you can take in the application '''
|
||||||
|
from io import TextIOWrapper
|
||||||
|
|
||||||
from django.contrib.auth import authenticate, login, logout
|
from django.contrib.auth import authenticate, login, logout
|
||||||
from django.contrib.auth.decorators import login_required
|
from django.contrib.auth.decorators import login_required
|
||||||
from django.http import HttpResponseBadRequest
|
from django.http import HttpResponseBadRequest
|
||||||
from django.shortcuts import redirect
|
from django.shortcuts import redirect
|
||||||
from django.template.response import TemplateResponse
|
from django.template.response import TemplateResponse
|
||||||
import re
|
|
||||||
import csv
|
|
||||||
|
|
||||||
from fedireads import forms, models, books_manager, outgoing
|
from fedireads import forms, models, books_manager, outgoing
|
||||||
from fedireads.settings import DOMAIN
|
from fedireads.settings import DOMAIN
|
||||||
from fedireads.views import get_user_from_username
|
from fedireads.views import get_user_from_username
|
||||||
|
from fedireads.goodreads_import import GoodreadsCsv
|
||||||
|
|
||||||
|
|
||||||
def user_login(request):
|
def user_login(request):
|
||||||
|
@ -289,54 +290,18 @@ def delete_follow_request(request):
|
||||||
|
|
||||||
outgoing.handle_outgoing_reject(requester, request.user, follow_request)
|
outgoing.handle_outgoing_reject(requester, request.user, follow_request)
|
||||||
return redirect('/user/%s' % request.user.localname)
|
return redirect('/user/%s' % request.user.localname)
|
||||||
|
|
||||||
def unquote_string(text):
|
|
||||||
match = re.match(r'="([^"]*)"', text)
|
|
||||||
if match:
|
|
||||||
return match.group(1)
|
|
||||||
else:
|
|
||||||
return text
|
|
||||||
|
|
||||||
def construct_search_term(title, author):
|
|
||||||
# Strip brackets (usually series title from search term)
|
|
||||||
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
|
|
||||||
# Open library doesn't like including author initials in search term.
|
|
||||||
author = re.sub(r'(\w\.)+\s*', '', author)
|
|
||||||
|
|
||||||
return ' '.join([title, author])
|
|
||||||
|
|
||||||
import itertools
|
|
||||||
from io import TextIOWrapper
|
|
||||||
from requests import HTTPError
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def import_data(request):
|
def import_data(request):
|
||||||
form = forms.ImportForm(request.POST, request.FILES)
|
form = forms.ImportForm(request.POST, request.FILES)
|
||||||
if form.is_valid():
|
if form.is_valid():
|
||||||
reader = csv.DictReader(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding))
|
|
||||||
results = []
|
results = []
|
||||||
failures = []
|
failures = []
|
||||||
for line in itertools.islice(reader, 20):
|
for item in GoodreadsCsv(TextIOWrapper(request.FILES['csv_file'], encoding=request.encoding)):
|
||||||
isbn = unquote_string(line['ISBN13'])
|
if item.book:
|
||||||
print(line['Title'], isbn, line['Exclusive Shelf'])
|
results.append(item.book)
|
||||||
search_results = books_manager.search(isbn)
|
|
||||||
if search_results:
|
|
||||||
book = books_manager.get_or_create_book(search_results[0].key)
|
|
||||||
print(book)
|
|
||||||
results.append(book)
|
|
||||||
else:
|
else:
|
||||||
try:
|
failures.append(item)
|
||||||
search_term = construct_search_term(line['Title'], line['Author'])
|
|
||||||
print("Search term: ", search_term)
|
|
||||||
search_results = books_manager.search(search_term)
|
|
||||||
if search_results:
|
|
||||||
book = books_manager.get_or_create_book(search_results[0].key)
|
|
||||||
print(book)
|
|
||||||
results.append(book)
|
|
||||||
else:
|
|
||||||
failures.append(line)
|
|
||||||
except HTTPError:
|
|
||||||
failures.append(line) #
|
|
||||||
return TemplateResponse(request, 'import_results.html', {
|
return TemplateResponse(request, 'import_results.html', {
|
||||||
'results': results,
|
'results': results,
|
||||||
'failures': failures
|
'failures': failures
|
||||||
|
|
Loading…
Reference in a new issue