bookwyrm/fedireads/goodreads_import.py

96 lines
2.6 KiB
Python
Raw Normal View History

2020-03-29 07:05:09 +00:00
''' handle reading a csv from goodreads '''
import re
import csv
import itertools
from requests import HTTPError
from fedireads import books_manager
2020-03-29 07:05:09 +00:00
2020-03-25 12:58:27 +00:00
# Mapping goodreads -> fedireads shelf titles.
GOODREADS_SHELVES = {
2020-03-29 07:05:09 +00:00
'read': 'read',
'currently-reading': 'reading',
'to-read': 'to-read',
2020-03-25 12:58:27 +00:00
}
2020-03-29 07:05:09 +00:00
# TODO: remove or notify about this in the UI
MAX_ENTRIES = 20
2020-03-25 12:58:27 +00:00
2020-03-29 07:05:09 +00:00
def unquote_string(text):
2020-03-29 07:05:09 +00:00
''' resolve csv quote weirdness '''
match = re.match(r'="([^"]*)"', text)
if match:
return match.group(1)
2020-04-01 13:58:30 +00:00
return text
2020-03-29 07:05:09 +00:00
def construct_search_term(title, author):
2020-03-29 07:05:09 +00:00
''' formulate a query for the data connector '''
# Strip brackets (usually series title from search term)
title = re.sub(r'\s*\([^)]*\)\s*', '', title)
# Open library doesn't like including author initials in search term.
author = re.sub(r'(\w\.)+\s*', '', author)
return ' '.join([title, author])
2020-03-29 07:05:09 +00:00
2020-04-01 13:58:30 +00:00
class GoodreadsCsv:
2020-03-29 07:05:09 +00:00
''' define a goodreads csv '''
def __init__(self, csv_file):
self.reader = csv.DictReader(csv_file)
def __iter__(self):
for line in itertools.islice(self.reader, MAX_ENTRIES):
entry = GoodreadsItem(line)
try:
entry.resolve()
except HTTPError:
pass
yield entry
2020-03-29 07:05:09 +00:00
2020-04-01 13:58:30 +00:00
class GoodreadsItem:
2020-03-29 07:05:09 +00:00
''' a processed line in a goodreads csv '''
def __init__(self, line):
self.line = line
self.book = None
2020-03-29 07:05:09 +00:00
def resolve(self):
2020-03-29 07:05:09 +00:00
''' try various ways to lookup a book '''
self.book = self.get_book_from_isbn()
if not self.book:
self.book = self.get_book_from_title_author()
2020-03-29 07:05:09 +00:00
def get_book_from_isbn(self):
2020-03-29 07:05:09 +00:00
''' search by isbn '''
isbn = unquote_string(self.line['ISBN13'])
search_results = books_manager.search(isbn)
if search_results:
return books_manager.get_or_create_book(search_results[0].key)
2020-03-29 07:05:09 +00:00
def get_book_from_title_author(self):
2020-03-29 07:05:09 +00:00
''' search by title and author '''
search_term = construct_search_term(
self.line['Title'],
self.line['Author']
)
search_results = books_manager.search(search_term)
if search_results:
return books_manager.get_or_create_book(search_results[0].key)
2020-03-25 12:58:27 +00:00
@property
def shelf(self):
2020-03-29 07:05:09 +00:00
''' the goodreads shelf field '''
2020-03-25 12:58:27 +00:00
if self.line['Exclusive Shelf']:
return GOODREADS_SHELVES[self.line['Exclusive Shelf']]
def __repr__(self):
return "<GoodreadsItem {!r}>".format(self.line['Title'])
def __str__(self):
return "{} by {}".format(self.line['Title'], self.line['Author'])