moviewyrm/bookwyrm/models/book.py

226 lines
8.3 KiB
Python
Raw Normal View History

2020-02-15 22:38:46 +00:00
''' database schema for books and shelves '''
2020-10-30 19:57:31 +00:00
import re
2020-02-11 23:17:21 +00:00
from django.db import models
from django.db.models import Q
from django.utils import timezone
from model_utils.managers import InheritanceManager
2020-02-17 03:17:11 +00:00
from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN
from bookwyrm.utils.fields import ArrayField
2020-02-15 22:38:46 +00:00
2020-11-04 20:55:00 +00:00
from .base_model import ActivityMapping, BookWyrmModel
from .base_model import ActivitypubMixin, OrderedCollectionPageMixin
2020-09-21 15:16:34 +00:00
class Book(ActivitypubMixin, BookWyrmModel):
2020-03-07 06:56:44 +00:00
''' a generic book, which can mean either an edition or a work '''
origin_id = models.CharField(max_length=255, null=True, blank=True)
2020-03-07 06:56:44 +00:00
# these identifiers apply to both works and editions
2020-03-28 22:06:16 +00:00
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
librarything_key = models.CharField(max_length=255, blank=True, null=True)
goodreads_key = models.CharField(max_length=255, blank=True, null=True)
2020-02-11 23:17:21 +00:00
2020-03-07 06:56:44 +00:00
# info about where the data comes from and where/if to sync
sync = models.BooleanField(default=True)
2020-03-28 22:06:16 +00:00
sync_cover = models.BooleanField(default=True)
2020-03-30 00:40:51 +00:00
last_sync_date = models.DateTimeField(default=timezone.now)
connector = models.ForeignKey(
'Connector', on_delete=models.PROTECT, null=True)
2020-02-11 23:17:21 +00:00
2020-03-07 06:56:44 +00:00
# TODO: edit history
2020-02-11 23:17:21 +00:00
2020-03-07 06:56:44 +00:00
# book/work metadata
title = models.CharField(max_length=255)
2020-03-28 22:06:16 +00:00
sort_title = models.CharField(max_length=255, blank=True, null=True)
subtitle = models.CharField(max_length=255, blank=True, null=True)
2020-03-07 06:56:44 +00:00
description = models.TextField(blank=True, null=True)
2020-03-30 20:15:49 +00:00
languages = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
2020-03-07 06:56:44 +00:00
series = models.CharField(max_length=255, blank=True, null=True)
series_number = models.CharField(max_length=255, blank=True, null=True)
2020-03-28 04:28:52 +00:00
subjects = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
subject_places = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
2020-03-07 06:56:44 +00:00
# TODO: include an annotation about the type of authorship (ie, translator)
2020-02-11 23:17:21 +00:00
authors = models.ManyToManyField('Author')
2020-04-29 17:09:14 +00:00
# preformatted authorship string for search and easier display
author_text = models.CharField(max_length=255, blank=True, null=True)
2020-02-11 23:17:21 +00:00
cover = models.ImageField(upload_to='covers/', blank=True, null=True)
2020-03-28 22:06:16 +00:00
first_published_date = models.DateTimeField(blank=True, null=True)
published_date = models.DateTimeField(blank=True, null=True)
objects = InheritanceManager()
2020-03-07 06:56:44 +00:00
@property
def ap_authors(self):
2020-09-17 20:09:11 +00:00
''' the activitypub serialization should be a list of author ids '''
2020-11-13 17:47:35 +00:00
return [a.remote_id for a in self.authors.all()]
activity_mappings = [
2020-11-13 17:47:35 +00:00
ActivityMapping('id', 'remote_id'),
ActivityMapping('authors', 'ap_authors'),
ActivityMapping('firstPublishedDate', 'firstpublished_date'),
ActivityMapping('publishedDate', 'published_date'),
ActivityMapping('title', 'title'),
ActivityMapping('sortTitle', 'sort_title'),
ActivityMapping('subtitle', 'subtitle'),
ActivityMapping('description', 'description'),
ActivityMapping('languages', 'languages'),
ActivityMapping('series', 'series'),
ActivityMapping('seriesNumber', 'series_number'),
ActivityMapping('subjects', 'subjects'),
ActivityMapping('subjectPlaces', 'subject_places'),
ActivityMapping('openlibraryKey', 'openlibrary_key'),
ActivityMapping('librarythingKey', 'librarything_key'),
ActivityMapping('goodreadsKey', 'goodreads_key'),
ActivityMapping('work', 'parent_work'),
ActivityMapping('isbn10', 'isbn_10'),
ActivityMapping('isbn13', 'isbn_13'),
ActivityMapping('oclcNumber', 'oclc_number'),
ActivityMapping('asin', 'asin'),
ActivityMapping('pages', 'pages'),
ActivityMapping('physicalFormat', 'physical_format'),
ActivityMapping('publishers', 'publishers'),
ActivityMapping('lccn', 'lccn'),
ActivityMapping('editions', 'editions_path'),
ActivityMapping('cover', 'cover'),
]
def save(self, *args, **kwargs):
''' can't be abstract for query reasons, but you shouldn't USE it '''
if not isinstance(self, Edition) and not isinstance(self, Work):
raise ValueError('Books should be added as Editions or Works')
2020-11-13 17:47:35 +00:00
if self.id and not self.remote_id:
self.remote_id = self.get_remote_id()
super().save(*args, **kwargs)
def get_remote_id(self):
''' editions and works both use "book" instead of model_name '''
return 'https://%s/book/%d' % (DOMAIN, self.id)
def __repr__(self):
2020-03-28 04:28:52 +00:00
return "<{} key={!r} title={!r}>".format(
self.__class__,
self.openlibrary_key,
self.title,
)
2020-03-07 06:56:44 +00:00
2020-11-04 20:55:00 +00:00
class Work(OrderedCollectionPageMixin, Book):
2020-03-07 06:56:44 +00:00
''' a work (an abstract concept of a book that manifests in an edition) '''
# library of congress catalog control number
2020-03-28 22:06:16 +00:00
lccn = models.CharField(max_length=255, blank=True, null=True)
2020-11-04 20:55:00 +00:00
# this has to be nullable but should never be null
default_edition = models.ForeignKey(
'Edition',
on_delete=models.PROTECT,
null=True
)
2020-03-07 06:56:44 +00:00
@property
def editions_path(self):
2020-09-17 20:09:11 +00:00
''' it'd be nice to serialize the edition instead but, recursion '''
default = self.default_edition
ed_list = [
2020-11-13 17:47:35 +00:00
e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all()
]
2020-11-13 17:47:35 +00:00
return [default.remote_id] + ed_list
2020-11-04 20:55:00 +00:00
def to_edition_list(self, **kwargs):
''' activitypub serialization for this work's editions '''
2020-11-13 17:47:35 +00:00
remote_id = self.remote_id + '/editions'
2020-11-04 20:55:00 +00:00
return self.to_ordered_collection(
self.edition_set,
remote_id=remote_id,
**kwargs
)
activity_serializer = activitypub.Work
2020-03-07 06:56:44 +00:00
class Edition(Book):
''' an edition of a book '''
2020-04-29 17:09:14 +00:00
# these identifiers only apply to editions, not works
isbn_10 = models.CharField(max_length=255, blank=True, null=True)
isbn_13 = models.CharField(max_length=255, blank=True, null=True)
2020-03-28 22:06:16 +00:00
oclc_number = models.CharField(max_length=255, blank=True, null=True)
2020-04-29 17:09:14 +00:00
asin = models.CharField(max_length=255, blank=True, null=True)
2020-03-28 22:06:16 +00:00
pages = models.IntegerField(blank=True, null=True)
physical_format = models.CharField(max_length=255, blank=True, null=True)
2020-03-28 04:28:52 +00:00
publishers = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
shelves = models.ManyToManyField(
'Shelf',
symmetrical=False,
through='ShelfBook',
through_fields=('book', 'shelf')
)
parent_work = models.ForeignKey('Work', on_delete=models.PROTECT, null=True)
2020-02-17 03:17:11 +00:00
activity_serializer = activitypub.Edition
2020-10-29 19:32:37 +00:00
def save(self, *args, **kwargs):
''' calculate isbn 10/13 '''
if self.isbn_13 and self.isbn_13[:3] == '978' and not self.isbn_10:
self.isbn_10 = isbn_13_to_10(self.isbn_13)
if self.isbn_10 and not self.isbn_13:
self.isbn_13 = isbn_10_to_13(self.isbn_10)
return super().save(*args, **kwargs)
2020-10-29 19:32:37 +00:00
def isbn_10_to_13(isbn_10):
''' convert an isbn 10 into an isbn 13 '''
2020-10-30 19:57:31 +00:00
isbn_10 = re.sub(r'[^0-9X]', '', isbn_10)
2020-10-29 19:32:37 +00:00
# drop the last character of the isbn 10 number (the original checkdigit)
converted = isbn_10[:9]
# add "978" to the front
converted = '978' + converted
# add a check digit to the end
# multiply the odd digits by 1 and the even digits by 3 and sum them
2020-10-30 19:57:31 +00:00
try:
checksum = sum(int(i) for i in converted[::2]) + \
2020-10-29 19:32:37 +00:00
sum(int(i) * 3 for i in converted[1::2])
2020-10-30 19:57:31 +00:00
except ValueError:
return None
2020-10-29 19:32:37 +00:00
# add the checksum mod 10 to the end
checkdigit = checksum % 10
if checkdigit != 0:
checkdigit = 10 - checkdigit
return converted + str(checkdigit)
def isbn_13_to_10(isbn_13):
''' convert isbn 13 to 10, if possible '''
if isbn_13[:3] != '978':
return None
2020-10-30 19:57:31 +00:00
isbn_13 = re.sub(r'[^0-9X]', '', isbn_13)
2020-10-30 19:43:02 +00:00
2020-10-29 19:32:37 +00:00
# remove '978' and old checkdigit
converted = isbn_13[3:-1]
# calculate checkdigit
# multiple each digit by 10,9,8.. successively and sum them
2020-10-30 19:57:31 +00:00
try:
checksum = sum(int(d) * (10 - idx) for (idx, d) in enumerate(converted))
except ValueError:
return None
2020-10-29 19:32:37 +00:00
checkdigit = checksum % 11
checkdigit = 11 - checkdigit
if checkdigit == 10:
checkdigit = 'X'
return converted + str(checkdigit)