1
0
Fork 1
mirror of https://github.com/bookwyrm-social/bookwyrm.git synced 2025-04-24 03:04:10 +00:00

Merge pull request from hughrun/duplicate-authors

Duplicate authors
This commit is contained in:
Hugh Rundle 2025-04-01 18:16:38 +11:00 committed by GitHub
commit 4c764cd543
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -0,0 +1,51 @@
from django.core.management.base import BaseCommand
from django.db.models import Count
from bookwyrm import models
def find_duplicate_author_names():
"""Show authors that have same name"""
dupes = (
models.Author.objects.values("name")
.annotate(Count("name"))
.filter(name__count__gt=1)
.exclude(name="")
.exclude(name__isnull=True)
.order_by("name__count")
)
for dupe in dupes:
value = dupe["name"]
print("----------")
objs = (
models.Author.objects.filter(name=value)
.annotate(num_books=Count("book", distinct=True))
.order_by("-num_books", "id")
)
print(
"You could check if the following authors are actually the same and can be merged, (only checked based on name)"
)
for obj in objs:
born = obj.born.year if obj.born else ""
died = obj.died.year if obj.died else ""
years = ""
if born or died:
years = f" ({born}-{died})"
print(
f"- {obj.remote_id}, {obj.name}{years} book editions found:{obj.num_books}"
)
class Command(BaseCommand):
"""Show all the authors that appear with same name, but different id"""
help = "show authors with same name but different id"
# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""run deduplications"""
find_duplicate_author_names()
print("----------")
print(
"You should manually check each author id to determine if they are same author before thinking of merging"
)