Add identity pruning, improve post pruning

This commit is contained in:
Andrew Godwin 2023-11-12 18:01:01 -07:00
parent 9fc497f826
commit 74f69a3813
5 changed files with 70 additions and 6 deletions

View file

@ -3,6 +3,7 @@ import sys
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import Q
from django.utils import timezone from django.utils import timezone
from activities.models import Post from activities.models import Post
@ -21,13 +22,21 @@ class Command(BaseCommand):
) )
def handle(self, number: int, *args, **options): def handle(self, number: int, *args, **options):
if not settings.SETUP.REMOTE_PRUNE_HORIZON:
print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0")
sys.exit(2)
# Find a set of posts that match the initial criteria # Find a set of posts that match the initial criteria
print(f"Running query to find up to {number} old posts...") print(f"Running query to find up to {number} old posts...")
posts = Post.objects.filter( posts = Post.objects.filter(
local=False, local=False,
created__lt=timezone.now() created__lt=timezone.now()
- datetime.timedelta(days=settings.SETUP.REMOTE_PRUNE_HORIZON), - datetime.timedelta(days=settings.SETUP.REMOTE_PRUNE_HORIZON),
).exclude(interactions__identity__local=True)[:number] ).exclude(
Q(interactions__identity__local=True)
| Q(visibility=Post.Visibilities.mentioned)
)[
:number
]
post_ids_and_uris = dict(posts.values_list("object_uri", "id")) post_ids_and_uris = dict(posts.values_list("object_uri", "id"))
print(f" found {len(post_ids_and_uris)}") print(f" found {len(post_ids_and_uris)}")
@ -43,9 +52,12 @@ class Command(BaseCommand):
# Delete them # Delete them
print(f" down to {len(post_ids_and_uris)} to delete") print(f" down to {len(post_ids_and_uris)} to delete")
number_deleted, _ = Post.objects.filter( print("Deleting...")
number_deleted, deleted = Post.objects.filter(
id__in=post_ids_and_uris.values() id__in=post_ids_and_uris.values()
).delete() ).delete()
print(f"Deleted {number_deleted} posts and dependencies") print("Deleted:")
for model, model_deleted in deleted.items():
print(f" {model}: {model_deleted}")
if number_deleted == 0: if number_deleted == 0:
sys.exit(1) sys.exit(1)

View file

@ -143,9 +143,9 @@ class Settings(BaseSettings):
CACHES_DEFAULT: CacheBackendUrl | None = None CACHES_DEFAULT: CacheBackendUrl | None = None
# How long to wait, in days, until remote posts/profiles are pruned from # How long to wait, in days, until remote posts/profiles are pruned from
# our database if nobody local has interacted with them. Must be in rough # our database if nobody local has interacted with them.
# multiples of two weeks. Set to zero to disable. # Set to zero to disable.
REMOTE_PRUNE_HORIZON: int = 0 REMOTE_PRUNE_HORIZON: int = 90
# Stator tuning # Stator tuning
STATOR_CONCURRENCY: int = 50 STATOR_CONCURRENCY: int = 50

View file

View file

View file

@ -0,0 +1,52 @@
import sys
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import Q
from django.utils import timezone
from users.models import Identity
class Command(BaseCommand):
help = "Prunes identities that have no local interaction"
def add_arguments(self, parser):
parser.add_argument(
"--number",
"-n",
type=int,
default=1000,
help="The maximum number of identities to prune at once",
)
def handle(self, number: int, *args, **options):
if not settings.SETUP.REMOTE_PRUNE_HORIZON:
print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0")
sys.exit(2)
# Find a set of identities that match the initial criteria
print(f"Running query to find up to {number} unused identities...")
identities = Identity.objects.filter(
local=False,
created__lt=timezone.now(),
).exclude(
Q(interactions__post__local=True)
| Q(posts__isnull=False)
| Q(outbound_follows__isnull=False)
| Q(inbound_follows__isnull=False)
| Q(outbound_blocks__isnull=False)
| Q(inbound_blocks__isnull=False)
)[
:number
]
identity_ids = identities.values_list("id", flat=True)
print(f" found {len(identity_ids)}")
# Delete them
print("Deleting...")
number_deleted, deleted = Identity.objects.filter(id__in=identity_ids).delete()
print("Deleted:")
for model, model_deleted in deleted.items():
print(f" {model}: {model_deleted}")
if number_deleted == 0:
sys.exit(1)