moviewyrm/bookwyrm/models/book.py

295 lines
10 KiB
Python
Raw Normal View History

2021-03-08 16:49:10 +00:00
""" database schema for books and shelves """
2020-10-30 19:57:31 +00:00
import re
from django.db import models, transaction
from model_utils.managers import InheritanceManager
2020-02-17 03:17:11 +00:00
from bookwyrm import activitypub
from bookwyrm.settings import DOMAIN
2020-02-15 22:38:46 +00:00
from .activitypub_mixin import OrderedCollectionPageMixin, ObjectMixin
2020-11-30 22:40:26 +00:00
from .base_model import BookWyrmModel
from . import fields
2021-03-08 16:49:10 +00:00
2021-02-04 21:21:55 +00:00
class BookDataModel(ObjectMixin, BookWyrmModel):
2021-03-08 16:49:10 +00:00
""" fields shared between editable book data (books, works, authors) """
origin_id = models.CharField(max_length=255, null=True, blank=True)
2020-12-12 21:39:55 +00:00
openlibrary_key = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2021-04-06 17:40:39 +00:00
inventaire_id = fields.CharField(
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-12-12 21:39:55 +00:00
librarything_key = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-12-12 21:39:55 +00:00
goodreads_key = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2021-04-07 00:46:06 +00:00
bnf_id = fields.CharField( # Bibliothèque nationale de France
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-02-11 23:17:21 +00:00
2021-03-08 16:49:10 +00:00
last_edited_by = models.ForeignKey("User", on_delete=models.PROTECT, null=True)
class Meta:
2021-03-08 16:49:10 +00:00
""" can't initialize this model, that wouldn't make sense """
abstract = True
def save(self, *args, **kwargs):
2021-03-08 16:49:10 +00:00
""" ensure that the remote_id is within this instance """
if self.id:
self.remote_id = self.get_remote_id()
else:
self.origin_id = self.remote_id
self.remote_id = None
return super().save(*args, **kwargs)
2021-03-08 16:49:10 +00:00
def broadcast(self, activity, sender, software="bookwyrm"):
""" only send book data updates to other bookwyrm instances """
super().broadcast(activity, sender, software=software)
class Book(BookDataModel):
2021-03-08 16:49:10 +00:00
""" a generic book, which can mean either an edition or a work """
connector = models.ForeignKey("Connector", on_delete=models.PROTECT, null=True)
2020-02-11 23:17:21 +00:00
2020-03-07 06:56:44 +00:00
# book/work metadata
2020-11-30 22:40:26 +00:00
title = fields.CharField(max_length=255)
sort_title = fields.CharField(max_length=255, blank=True, null=True)
subtitle = fields.CharField(max_length=255, blank=True, null=True)
2020-12-17 00:47:05 +00:00
description = fields.HtmlField(blank=True, null=True)
2020-11-30 22:40:26 +00:00
languages = fields.ArrayField(
2020-03-30 20:15:49 +00:00
models.CharField(max_length=255), blank=True, default=list
)
2020-11-30 22:40:26 +00:00
series = fields.CharField(max_length=255, blank=True, null=True)
series_number = fields.CharField(max_length=255, blank=True, null=True)
subjects = fields.ArrayField(
models.CharField(max_length=255), blank=True, null=True, default=list
2020-03-28 04:28:52 +00:00
)
2020-11-30 22:40:26 +00:00
subject_places = fields.ArrayField(
models.CharField(max_length=255), blank=True, null=True, default=list
2020-03-28 04:28:52 +00:00
)
2021-03-08 16:49:10 +00:00
authors = fields.ManyToManyField("Author")
2020-12-17 20:46:05 +00:00
cover = fields.ImageField(
2021-03-08 16:49:10 +00:00
upload_to="covers/", blank=True, null=True, alt_field="alt_text"
)
2020-11-30 22:40:26 +00:00
first_published_date = fields.DateTimeField(blank=True, null=True)
published_date = fields.DateTimeField(blank=True, null=True)
2020-03-07 06:56:44 +00:00
objects = InheritanceManager()
2020-03-07 06:56:44 +00:00
@property
2020-12-21 19:57:45 +00:00
def author_text(self):
2021-03-08 16:49:10 +00:00
""" format a list of authors """
return ", ".join(a.name for a in self.authors.all())
@property
def latest_readthrough(self):
2021-03-08 16:49:10 +00:00
""" most recent readthrough activity """
return self.readthrough_set.order_by("-updated_date").first()
2020-12-21 19:57:45 +00:00
@property
2020-12-17 20:30:49 +00:00
def edition_info(self):
2021-03-08 16:49:10 +00:00
""" properties of this edition, as a string """
2020-12-17 20:30:49 +00:00
items = [
2021-03-08 16:49:10 +00:00
self.physical_format if hasattr(self, "physical_format") else None,
self.languages[0] + " language"
if self.languages and self.languages[0] != "English"
else None,
2020-12-17 20:30:49 +00:00
str(self.published_date.year) if self.published_date else None,
", ".join(self.publishers) if hasattr(self, "publishers") else None,
2020-12-17 20:30:49 +00:00
]
2021-03-08 16:49:10 +00:00
return ", ".join(i for i in items if i)
2020-12-17 20:30:49 +00:00
2020-12-17 20:46:05 +00:00
@property
2020-12-17 20:30:49 +00:00
def alt_text(self):
2021-03-08 16:49:10 +00:00
""" image alt test """
text = "%s" % self.title
2020-12-18 19:00:30 +00:00
if self.edition_info:
2021-03-08 16:49:10 +00:00
text += " (%s)" % self.edition_info
2020-12-18 19:00:30 +00:00
return text
def save(self, *args, **kwargs):
2021-03-08 16:49:10 +00:00
""" can't be abstract for query reasons, but you shouldn't USE it """
if not isinstance(self, Edition) and not isinstance(self, Work):
2021-03-08 16:49:10 +00:00
raise ValueError("Books should be added as Editions or Works")
2020-11-28 21:14:18 +00:00
return super().save(*args, **kwargs)
def get_remote_id(self):
2021-03-08 16:49:10 +00:00
""" editions and works both use "book" instead of model_name """
return "https://%s/book/%d" % (DOMAIN, self.id)
def __repr__(self):
2020-03-28 04:28:52 +00:00
return "<{} key={!r} title={!r}>".format(
self.__class__,
self.openlibrary_key,
self.title,
)
2020-03-07 06:56:44 +00:00
2020-11-04 20:55:00 +00:00
class Work(OrderedCollectionPageMixin, Book):
2021-03-08 16:49:10 +00:00
""" a work (an abstract concept of a book that manifests in an edition) """
2020-03-07 06:56:44 +00:00
# library of congress catalog control number
2020-12-12 21:39:55 +00:00
lccn = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-11-04 20:55:00 +00:00
# this has to be nullable but should never be null
2020-11-30 22:40:26 +00:00
default_edition = fields.ForeignKey(
2021-03-08 16:49:10 +00:00
"Edition", on_delete=models.PROTECT, null=True, load_remote=False
2020-11-04 20:55:00 +00:00
)
2020-03-07 06:56:44 +00:00
def save(self, *args, **kwargs):
2021-03-08 16:49:10 +00:00
""" set some fields on the edition object """
# set rank
for edition in self.editions.all():
edition.save()
return super().save(*args, **kwargs)
2020-11-29 01:41:57 +00:00
def get_default_edition(self):
2021-03-08 16:49:10 +00:00
""" in case the default edition is not set """
return self.default_edition or self.editions.order_by("-edition_rank").first()
@transaction.atomic()
def reset_default_edition(self):
""" sets a new default edition based on computed rank """
self.default_edition = None
# editions are re-ranked implicitly
self.save()
self.default_edition = self.get_default_edition()
self.save()
2020-11-04 20:55:00 +00:00
def to_edition_list(self, **kwargs):
2021-03-08 16:49:10 +00:00
""" an ordered collection of editions """
2020-11-04 20:55:00 +00:00
return self.to_ordered_collection(
2021-03-08 16:49:10 +00:00
self.editions.order_by("-edition_rank").all(),
remote_id="%s/editions" % self.remote_id,
2020-11-04 20:55:00 +00:00
**kwargs
)
activity_serializer = activitypub.Work
2021-03-08 16:49:10 +00:00
serialize_reverse_fields = [("editions", "editions", "-edition_rank")]
deserialize_reverse_fields = [("editions", "editions")]
2020-03-07 06:56:44 +00:00
class Edition(Book):
2021-03-08 16:49:10 +00:00
""" an edition of a book """
2020-04-29 17:09:14 +00:00
# these identifiers only apply to editions, not works
2020-12-12 21:39:55 +00:00
isbn_10 = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-12-12 21:39:55 +00:00
isbn_13 = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-12-12 21:39:55 +00:00
oclc_number = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-12-12 21:39:55 +00:00
asin = fields.CharField(
2021-03-08 16:49:10 +00:00
max_length=255, blank=True, null=True, deduplication_field=True
)
2020-11-30 22:40:26 +00:00
pages = fields.IntegerField(blank=True, null=True)
physical_format = fields.CharField(max_length=255, blank=True, null=True)
publishers = fields.ArrayField(
2020-03-28 04:28:52 +00:00
models.CharField(max_length=255), blank=True, default=list
)
shelves = models.ManyToManyField(
2021-03-08 16:49:10 +00:00
"Shelf",
symmetrical=False,
2021-03-08 16:49:10 +00:00
through="ShelfBook",
through_fields=("book", "shelf"),
)
2020-11-30 22:40:26 +00:00
parent_work = fields.ForeignKey(
2021-03-08 16:49:10 +00:00
"Work",
on_delete=models.PROTECT,
null=True,
related_name="editions",
activitypub_field="work",
)
2021-01-11 17:18:40 +00:00
edition_rank = fields.IntegerField(default=0)
2020-02-17 03:17:11 +00:00
activity_serializer = activitypub.Edition
2021-03-08 16:49:10 +00:00
name_field = "title"
def get_rank(self, ignore_default=False):
2021-03-08 16:49:10 +00:00
""" calculate how complete the data is on this edition """
if (
not ignore_default
and self.parent_work
and self.parent_work.default_edition == self
):
2021-01-11 17:29:23 +00:00
# default edition has the highest rank
return 20
rank = 0
rank += int(bool(self.cover)) * 3
rank += int(bool(self.isbn_13))
rank += int(bool(self.isbn_10))
rank += int(bool(self.oclc_number))
rank += int(bool(self.pages))
rank += int(bool(self.physical_format))
rank += int(bool(self.description))
# max rank is 9
return rank
2020-10-29 19:32:37 +00:00
def save(self, *args, **kwargs):
2021-03-08 16:49:10 +00:00
""" set some fields on the edition object """
2021-01-11 17:29:23 +00:00
# calculate isbn 10/13
2021-03-08 16:49:10 +00:00
if self.isbn_13 and self.isbn_13[:3] == "978" and not self.isbn_10:
2020-10-29 19:32:37 +00:00
self.isbn_10 = isbn_13_to_10(self.isbn_13)
if self.isbn_10 and not self.isbn_13:
self.isbn_13 = isbn_10_to_13(self.isbn_10)
2021-01-11 17:29:23 +00:00
# set rank
self.edition_rank = self.get_rank()
2021-01-11 17:29:23 +00:00
return super().save(*args, **kwargs)
2020-10-29 19:32:37 +00:00
def isbn_10_to_13(isbn_10):
2021-03-08 16:49:10 +00:00
""" convert an isbn 10 into an isbn 13 """
isbn_10 = re.sub(r"[^0-9X]", "", isbn_10)
2020-10-29 19:32:37 +00:00
# drop the last character of the isbn 10 number (the original checkdigit)
converted = isbn_10[:9]
# add "978" to the front
2021-03-08 16:49:10 +00:00
converted = "978" + converted
2020-10-29 19:32:37 +00:00
# add a check digit to the end
# multiply the odd digits by 1 and the even digits by 3 and sum them
2020-10-30 19:57:31 +00:00
try:
2021-03-08 16:49:10 +00:00
checksum = sum(int(i) for i in converted[::2]) + sum(
int(i) * 3 for i in converted[1::2]
)
2020-10-30 19:57:31 +00:00
except ValueError:
return None
2020-10-29 19:32:37 +00:00
# add the checksum mod 10 to the end
checkdigit = checksum % 10
if checkdigit != 0:
checkdigit = 10 - checkdigit
return converted + str(checkdigit)
def isbn_13_to_10(isbn_13):
2021-03-08 16:49:10 +00:00
""" convert isbn 13 to 10, if possible """
if isbn_13[:3] != "978":
2020-10-29 19:32:37 +00:00
return None
2021-03-08 16:49:10 +00:00
isbn_13 = re.sub(r"[^0-9X]", "", isbn_13)
2020-10-30 19:43:02 +00:00
2020-10-29 19:32:37 +00:00
# remove '978' and old checkdigit
converted = isbn_13[3:-1]
# calculate checkdigit
# multiple each digit by 10,9,8.. successively and sum them
2020-10-30 19:57:31 +00:00
try:
checksum = sum(int(d) * (10 - idx) for (idx, d) in enumerate(converted))
except ValueError:
return None
2020-10-29 19:32:37 +00:00
checkdigit = checksum % 11
checkdigit = 11 - checkdigit
if checkdigit == 10:
2021-03-08 16:49:10 +00:00
checkdigit = "X"
2020-10-29 19:32:37 +00:00
return converted + str(checkdigit)