Add myself to a few docs (#713 )

* Add myself to a few docs * Update conf.py
fix 500 when save edited annoucements (#708 )
2024-06-02 21:39:28 +00:00 · 2024-05-21 14:11:53 -04:00 · 2024-05-21 13:55:17 -04:00 · 2024-02-06 14:49:35 -07:00 · 2024-02-05 21:40:04 -05:00 · 2024-02-05 21:18:59 -05:00
47 changed files with 772 additions and 125 deletions
--- a/README.md
+++ b/README.md
@ -3,7 +3,7 @@
 A *beta* Fediverse server for microblogging. Not fully polished yet -
 we're still working towards a 1.0!

-**Current version: [0.9](https://docs.jointakahe.org/en/latest/releases/0.9/)**
+**Current version: [0.11.0](https://docs.jointakahe.org/en/latest/releases/0.11/)**

 Key features:

--- a/activities/management/init.py
+++ b/activities/management/init.py
--- a/activities/management/commands/init.py
+++ b/activities/management/commands/init.py
--- a/activities/management/commands/pruneposts.py
+++ b/activities/management/commands/pruneposts.py
@ -0,0 +1,83 @@
+import datetime
+import sys
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from django.db.models import Q
+from django.utils import timezone
+
+from activities.models import Post
+
+
+class Command(BaseCommand):
+    help = "Prunes posts that are old, not local and have no local interaction"
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--number",
+            "-n",
+            type=int,
+            default=500,
+            help="The maximum number of posts to prune at once",
+        )
+
+    def handle(self, number: int, *args, **options):
+        if not settings.SETUP.REMOTE_PRUNE_HORIZON:
+            print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0")
+            sys.exit(2)
+        # Find a set of posts that match the initial criteria
+        print(f"Running query to find up to {number} old posts...")
+        posts = (
+            Post.objects.filter(
+                local=False,
+                created__lt=timezone.now()
+                - datetime.timedelta(days=settings.SETUP.REMOTE_PRUNE_HORIZON),
+            )
+            .exclude(
+                Q(interactions__identity__local=True)
+                | Q(visibility=Post.Visibilities.mentioned)
+            )
+            .order_by("?")[:number]
+        )
+        post_ids_and_uris = dict(posts.values_list("object_uri", "id"))
+        print(f"  found {len(post_ids_and_uris)}")
+
+        # Fetch all of their replies and exclude any that have local replies
+        print("Excluding ones with local replies...")
+        replies = Post.objects.filter(
+            local=True,
+            in_reply_to__in=post_ids_and_uris.keys(),
+        ).values_list("in_reply_to", flat=True)
+        for reply in replies:
+            if reply and reply in post_ids_and_uris:
+                del post_ids_and_uris[reply]
+        print(f"  narrowed down to {len(post_ids_and_uris)}")
+
+        # Fetch all the posts that they are replies to, and don't delete ones
+        # that are replies to local posts
+        print("Excluding ones that are replies to local posts...")
+        in_reply_tos = (
+            Post.objects.filter(id__in=post_ids_and_uris.values())
+            .values_list("in_reply_to", flat=True)
+            .distinct()
+        )
+        local_object_uris = Post.objects.filter(
+            local=True, object_uri__in=in_reply_tos
+        ).values_list("object_uri", flat=True)
+        final_post_ids = list(
+            Post.objects.filter(id__in=post_ids_and_uris.values())
+            .exclude(in_reply_to__in=local_object_uris)
+            .values_list("id", flat=True)
+        )
+        print(f"  narrowed down to {len(final_post_ids)}")
+
+        # Delete them
+        if not final_post_ids:
+            sys.exit(0)
+
+        print("Deleting...")
+        _, deleted = Post.objects.filter(id__in=final_post_ids).delete()
+        print("Deleted:")
+        for model, model_deleted in deleted.items():
+            print(f"  {model}: {model_deleted}")
+        sys.exit(1)
--- a/activities/models/post.py
+++ b/activities/models/post.py
@ -46,6 +46,8 @@ from users.models.identity import Identity, IdentityStates
 from users.models.inbox_message import InboxMessage
 from users.models.system_actor import SystemActor

+logger = logging.getLogger(__name__)
+

 class PostStates(StateGraph):
    new = State(try_interval=300)
@ -581,7 +583,7 @@ class Post(StatorModel):
                domain=domain,
                fetch=True,
            )
-            if identity is not None:
+            if identity is not None and not identity.deleted:
                mentions.add(identity)
        return mentions

@ -763,6 +765,9 @@ class Post(StatorModel):
        targets = set()
        for mention in self.mentions.all():
            targets.add(mention)
+        if self.visibility in [Post.Visibilities.public, Post.Visibilities.unlisted]:
+            for interaction in self.interactions.all():
+                targets.add(interaction.identity)
        # Then, if it's not mentions only, also deliver to followers and all hashtag followers
        if self.visibility != Post.Visibilities.mentioned:
            for follower in self.author.inbound_follows.filter(
@ -897,7 +902,7 @@ class Post(StatorModel):
                # don't have content, but this shouldn't be a total failure
                post.content = get_value_or_map(data, "content", "contentMap")
            except ActivityPubFormatError as err:
-                logging.warning(f"{err} on {post.url}")
+                logger.warning("%s on %s", err, post.url)
                post.content = None
            # Document types have names, not summaries
            post.summary = data.get("summary") or data.get("name")
@ -993,8 +998,10 @@ class Post(StatorModel):
                    try:
                        cls.ensure_object_uri(post.in_reply_to, reason=post.object_uri)
                    except ValueError:
-                        logging.warning(
-                            f"Cannot fetch ancestor of Post={post.pk}, ancestor_uri={post.in_reply_to}"
+                        logger.warning(
+                            "Cannot fetch ancestor of Post=%s, ancestor_uri=%s",
+                            post.pk,
+                            post.in_reply_to,
                        )
                else:
                    parent.calculate_stats()
--- a/activities/services/post.py
+++ b/activities/services/post.py
@ -9,6 +9,8 @@ from activities.models import (
 )
 from users.models import Identity

+logger = logging.getLogger(__name__)
+

 class PostService:
    """
@ -99,7 +101,7 @@ class PostService:
                try:
                    Post.ensure_object_uri(object_uri, reason=reason)
                except ValueError:
-                    logging.error(
+                    logger.error(
                        f"Cannot fetch ancestor Post={self.post.pk}, ancestor_uri={object_uri}"
                    )
                break
--- a/activities/services/search.py
+++ b/activities/services/search.py
@ -1,6 +1,7 @@
 import httpx

 from activities.models import Hashtag, Post
+from core.json import json_from_response
 from core.ld import canonicalise
 from users.models import Domain, Identity, IdentityStates
 from users.models.system_actor import SystemActor
@ -81,7 +82,12 @@ class SearchService:
            return None
        if response.status_code >= 400:
            return None
-        document = canonicalise(response.json(), include_security=True)
+
+        json_data = json_from_response(response)
+        if not json_data:
+            return None
+
+        document = canonicalise(json_data, include_security=True)
        type = document.get("type", "unknown").lower()

        # Is it an identity?
--- a/api/views/instance.py
+++ b/api/views/instance.py
@ -41,7 +41,7 @@ def instance_info_v1(request):
            "accounts": {},
            "statuses": {
                "max_characters": Config.system.post_length,
-                "max_media_attachments": 4,
+                "max_media_attachments": Config.system.max_media_attachments,
                "characters_reserved_per_url": 23,
            },
            "media_attachments": {
@ -102,7 +102,7 @@ def instance_info_v2(request) -> dict:
            "accounts": {"max_featured_tags": 0},
            "statuses": {
                "max_characters": Config.system.post_length,
-                "max_media_attachments": 4,
+                "max_media_attachments": Config.system.max_media_attachments,
                "characters_reserved_per_url": 23,
            },
            "media_attachments": {
--- a/api/views/statuses.py
+++ b/api/views/statuses.py
@ -39,7 +39,7 @@ class PostPollSchema(Schema):


 class PostStatusSchema(Schema):
-    status: str
+    status: str | None
    in_reply_to_id: str | None = None
    sensitive: bool = False
    spoiler_text: str | None = None
@ -82,9 +82,9 @@ def post_for_id(request: HttpRequest, id: str) -> Post:
@api_view.post
 def post_status(request, details: PostStatusSchema) -> schemas.Status:
    # Check text length
-    if len(details.status) > Config.system.post_length:
+    if details.status and len(details.status) > Config.system.post_length:
        raise ApiError(400, "Status is too long")
-    if len(details.status) == 0 and not details.media_ids:
+    if not details.status and not details.media_ids:
        raise ApiError(400, "Status is empty")
    # Grab attachments
    attachments = [get_object_or_404(PostAttachment, pk=id) for id in details.media_ids]
@ -103,7 +103,7 @@ def post_status(request, details: PostStatusSchema) -> schemas.Status:
            pass
    post = Post.create_local(
        author=request.identity,
-        content=details.status,
+        content=details.status or "",
        summary=details.spoiler_text,
        sensitive=details.sensitive,
        visibility=visibility_map[details.visibility],
--- a/core/html.py
+++ b/core/html.py
@ -38,7 +38,7 @@ class FediverseHtmlParser(HTMLParser):
        r"(^|[^\w\d\-_/])@([\w\d\-_]+(?:@[\w\d\-_\.]+[\w\d\-_]+)?)"
    )

-    HASHTAG_REGEX = re.compile(r"\B#([a-zA-Z0-9(_)]+\b)(?!;)")
+    HASHTAG_REGEX = re.compile(r"\B#([\w()]+\b)(?!;)")

    EMOJI_REGEX = re.compile(r"\B:([a-zA-Z0-9(_)-]+):\B")

--- a/core/json.py
+++ b/core/json.py
@ -0,0 +1,32 @@
+import json
+
+from httpx import Response
+
+JSON_CONTENT_TYPES = [
+    "application/json",
+    "application/ld+json",
+    "application/activity+json",
+]
+
+
+def json_from_response(response: Response) -> dict | None:
+    content_type, *parameters = (
+        response.headers.get("Content-Type", "invalid").lower().split(";")
+    )
+
+    if content_type not in JSON_CONTENT_TYPES:
+        return None
+
+    charset = None
+
+    for parameter in parameters:
+        key, value = parameter.split("=")
+        if key.strip() == "charset":
+            charset = value.strip()
+
+    if charset:
+        return json.loads(response.content.decode(charset))
+    else:
+        # if no charset informed, default to
+        # httpx json for encoding inference
+        return response.json()
--- a/core/ld.py
+++ b/core/ld.py
@ -8,6 +8,8 @@ from pyld import jsonld

 from core.exceptions import ActivityPubFormatError

+logger = logging.getLogger(__name__)
+
 schemas = {
    "unknown": {
        "contentType": "application/ld+json",
@ -630,7 +632,7 @@ def builtin_document_loader(url: str, options={}):
    # Get URL without scheme
    pieces = urllib_parse.urlparse(url)
    if pieces.hostname is None:
-        logging.info(f"No host name for json-ld schema: {url!r}")
+        logger.info(f"No host name for json-ld schema: {url!r}")
        return schemas["unknown"]
    key = pieces.hostname + pieces.path.rstrip("/")
    try:
@ -641,7 +643,7 @@ def builtin_document_loader(url: str, options={}):
            return schemas[key]
        except KeyError:
            # return an empty context instead of throwing an error
-            logging.info(f"Ignoring unknown json-ld schema: {url!r}")
+            logger.info(f"Ignoring unknown json-ld schema: {url!r}")
            return schemas["unknown"]


--- a/core/models/config.py
+++ b/core/models/config.py
@ -214,6 +214,7 @@ class Config(models.Model):
        content_warning_text: str = "Content Warning"

        post_length: int = 500
+        max_media_attachments: int = 4
        post_minimum_interval: int = 3  # seconds
        identity_min_length: int = 2
        identity_max_per_user: int = 5
--- a/core/sentry.py
+++ b/core/sentry.py
@ -27,12 +27,14 @@ if SENTRY_ENABLED:
    set_context = sentry_sdk.set_context
    set_tag = sentry_sdk.set_tag
    start_transaction = sentry_sdk.start_transaction
+    start_span = sentry_sdk.start_span
 else:
    configure_scope = noop_context
    push_scope = noop_context
    set_context = noop
    set_tag = noop
    start_transaction = noop_context
+    start_span = noop_context


 def set_takahe_app(name: str):
--- a/core/signatures.py
+++ b/core/signatures.py
@ -19,6 +19,8 @@ from pyld import jsonld

 from core.ld import format_ld_date

+logger = logging.getLogger(__name__)
+

 class VerificationError(BaseException):
    """
@ -190,7 +192,7 @@ class HttpSignature:
        body: dict | None,
        private_key: str,
        key_id: str,
-        content_type: str = "application/json",
+        content_type: str = "application/activity+json",
        method: Literal["get", "post"] = "post",
        timeout: TimeoutTypes = settings.SETUP.REMOTE_TIMEOUT,
    ):
@ -217,7 +219,7 @@ class HttpSignature:
            body_bytes = b""
        # GET requests get implicit accept headers added
        if method == "get":
-            headers["Accept"] = "application/ld+json"
+            headers["Accept"] = "application/activity+json,application/ld+json"
        # Sign the headers
        signed_string = "\n".join(
            f"{name.lower()}: {value}" for name, value in headers.items()
@ -259,7 +261,7 @@ class HttpSignature:
                )
            except SSLError as invalid_cert:
                # Not our problem if the other end doesn't have proper SSL
-                logging.info(f"{uri} {invalid_cert}")
+                logger.info("Invalid cert on %s %s", uri, invalid_cert)
                raise SSLCertVerificationError(invalid_cert) from invalid_cert
            except InvalidCodepoint as ex:
                # Convert to a more generic error we handle
--- a/docs/conf.py
+++ b/docs/conf.py
@ -13,7 +13,7 @@ sys.path.insert(0, str(pathlib.Path(__file__).parent / "extensions"))

 project = "Takahē"
 copyright = "2022, Andrew Godwin"
-author = "Andrew Godwin"
+author = "Andrew Godwin, Jamie Bliss"

 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@ -172,3 +172,37 @@ We use `HTMX <https://htmx.org/>`_ for dynamically loading content, and
 `Hyperscript <https://hyperscript.org/>`_ for most interactions rather than raw
 JavaScript. If you can accomplish what you need with these tools, please use them
 rather than adding JS.
+
+
+Cutting a release
+-----------------
+
+In order to make a release of Takahē, follow these steps:
+
+* Create or update the release document (in ``/docs/releases``) for the
+  release; major versions get their own document, minor releases get a
+  subheading in the document for their major release.
+
+  * Go through the git commit history since the last release in order to write
+    a reasonable summary of features.
+
+  * Be sure to include the little paragraphs at the end about contributing and
+    the docker tag, and an Upgrade Notes section that at minimum mentions
+    migrations and if they're normal or weird (even if there aren't any, it's
+    nice to call that out).
+
+  * If it's a new doc, make sure you include it in ``docs/releases/index.rst``!
+
+* Update the version number in ``/takahe/__init__.py``
+
+* Update the version number in ``README.md``
+
+* Make a commit containing these changes called ``Releasing 1.23.45``.
+
+* Tag that commit with a tag in the format ``1.23.45``.
+
+* Wait for the GitHub Actions to run and publish the docker images (around 20
+  minutes as the ARM build is a bit slow)
+
+* Post on the official account announcing the relase and linking to the
+  now-published release notes.
--- a/docs/contributors.rst
+++ b/docs/contributors.rst
@ -9,7 +9,8 @@ page!
 Creator & Main Developer
 ------------------------

-* `Andrew Godwin <https://aeracode.org>`_
+* `Andrew Godwin <https://aeracode.org>`_ (Original creator)
+* `Jamie Bliss <https://tacobelllabs.net/@astraluma>`_ (Current maintainer)


 Core Contributors
--- a/docs/installation.rst
+++ b/docs/installation.rst
@ -167,6 +167,11 @@ If you omit the keys or the endpoint URL, then Takahē will try to use implicit
 authentication for them. The keys, if included, should be urlencoded, as AWS
 secret keys commonly contain eg + characters.

+With the above examples, Takahē connects to an S3 bucket using **HTTPS**. If
+you wish to connect to an S3 bucket using **HTTP** (for example, to connect to
+an S3 API endpoint on a private network), replace `s3` in the examples above
+with `s3-insecure`.
+
 Your S3 bucket *must* be set to allow publically-readable files, as Takahē will
 set all files it uploads to be ``public-read``. We randomise uploaded file
 names to prevent enumeration attacks.
--- a/docs/releases/0.10.rst
+++ b/docs/releases/0.10.rst
@ -1,7 +1,9 @@
 0.10
 ====

-*Released: Not Yet Released*
+*0.10.0 Released: 2023/11/12*
+
+*0.10.1 Released: 2023/11/13*

 This release is a polish release that mostly focuses on performance, stability
 and federation compatibility.
@ -35,12 +37,12 @@ Minor changes also include:
 * SMTP servers that don't require authentication are now supported.

 * Python 3.11 is now the minimum version required; this will not affect you at
-  all if you run Takahē via a docker image, as is recommended.
+  all if you run Takahē via our docker image, as is recommended.

-A remote post pruning system, to shrink the database of old data that was no
-longer needed, was in the development version but has been removed in this
-release due to the extra database load it caused. An alternative approach to
-this will hopefully land in a future release.
+An automatic remote post pruning system, to shrink the database of old data
+that was no longer needed, was in the development version but has been switched
+to a set of manual commands as of 0.10.1 - you can read more below or in
+:doc:`/tuning`.

 If you'd like to help with code, design, or other areas, see
 :doc:`/contributing` to see how to get in touch.
@ -49,20 +51,28 @@ You can download images from `Docker Hub <https://hub.docker.com/r/jointakahe/ta
 or use the image name ``jointakahe/takahe:0.10``.


+0.10.1
+------
+
+*Released: 2023/11/13*
+
+This is a bugfix and small feature addition release:
+
+* The ``runstator`` command now logs its output to the terminal again
+
+* Two new commands, ``pruneposts`` and ``pruneidentities`` are added, to enable
+  pruning (deletion of old content) of Posts and Identities respectively.
+  You can read more about them in :doc:`/tuning`.
+
+* Stator's default concurrency levels have been significantly reduced as it's
+  now way more efficient at using individual database connections, but as a
+  result it places way more load on them. You can read more about tuning this
+  in :doc:`/tuning`.
+
+
 Upgrade Notes
 -------------

-Remote Pruning
-~~~~~~~~~~~~~~
-
-Post pruning is now in and comes *enabled by default*, as it is not directly
-destructive (it will only delete content that has not been interacted with
-locally and which can be re-fetched).
-
-Nevertheless, if you want to avoid post deletion triggering on your server at
-all, you should set the ``TAKAHE_REMOTE_PRUNE_HORIZON`` environment variable to
-``0``.
-
 Migrations
 ~~~~~~~~~~

@ -76,3 +86,13 @@ We recommend:
 * Temporarily stopping all instances of the webserver and Stator
 * Applying the migration (should be less than a few minutes on most installs)
 * Restarting the instances of webserver and Stator
+
+Stator
+~~~~~~
+
+Stator's new internal architecture allocates a worker thread and a database
+connection up to its concurrency value; this means it is a _lot_ more efficient
+for a given "concurrency" number than the old system and also uses a lot more
+database connections. We recommend you reduce your configuration values for
+these by 5-10x; if you didn't set them manually, then don't worry, we've
+reduced the default values by a similar amount.
--- a/docs/releases/0.11.rst
+++ b/docs/releases/0.11.rst
@ -1,21 +1,54 @@
 0.11
 ====

-*Released: Not Yet Released*
+*Released: 2024-02-05*

-Notes TBD.
+This is largely a bugfix and catch up release.
+
+Some highlights:
+
+* Python 3.10 has been dropped. The new minimum Python version is 3.11
+* Jamie (`@astraluma@tacobelllabs.net <https://tacobelllabs.net/@astraluma>`_)
+  has officially joined the project
+* If your S3 does not use TLS, you must use ``s3-insecure`` in your
+  configuration
+* Takahē now supports unicode hashtags
+* Add a Maximum Media Attachments setting
+* Inverted the pruning command exit codes
+* Posts are no longer required to have text content
+
+And some interoperability bugs:
+
+* Fixed a bug with GoToSocial
+* Attempted to fix follows from Misskey family
+* Correctly handle when a federated report doesn't have content
+
+In additions, there's many bugfixes and minor changes, including:
+
+* Several JSON handling improvements
+* Post pruning now has a random element to it
+* More specific loggers
+* Don't make local identities stale
+* Don't try to unmute when there's no expiration
+* Don't try to WebFinger local users
+* Synchronize follow accepting and profile fetching
+* Perform some basic domain validity
+* Correctly reject more operations when the identity is deleted
+* Post edit fanouts for likers/boosters
+
+
+If you'd like to help with code, design, or other areas, see
+:doc:`/contributing` to see how to get in touch.
+
+You can download images from `Docker Hub <https://hub.docker.com/r/jointakahe/takahe>`_,
+or use the image name ``jointakahe/takahe:0.11``.


 Upgrade Notes
 -------------

-VAPID keys and Push notifications
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Migrations
+~~~~~~~~~~

-Takahē now supports push notifications if you supply a valid VAPID keypair as
-the ``TAKAHE_VAPID_PUBLIC_KEY`` and ``TAKAHE_VAPID_PRIVATE_KEY`` environment
-variables. You can generate a keypair via `https://web-push-codelab.glitch.me/`_.
-
-Note that users of apps may need to sign out and in again to their accounts for
-the app to notice that it can now do push notifications. Some apps, like Elk,
-may cache the fact your server didn't support it for a while.
+There are new database migrations; they are backwards-compatible and should
+not present any major database load.
--- a/docs/releases/index.rst
+++ b/docs/releases/index.rst
@ -7,6 +7,8 @@ Versions
 .. toctree::
   :maxdepth: 1

+   0.11
+   0.10
   0.9
   0.8
   0.7
--- a/docs/releases/next.rst
+++ b/docs/releases/next.rst
@ -0,0 +1,15 @@
+
+
+Upgrade Notes
+-------------
+
+VAPID keys and Push notifications
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Takahē now supports push notifications if you supply a valid VAPID keypair as
+the ``TAKAHE_VAPID_PUBLIC_KEY`` and ``TAKAHE_VAPID_PRIVATE_KEY`` environment
+variables. You can generate a keypair via `https://web-push-codelab.glitch.me/`_.
+
+Note that users of apps may need to sign out and in again to their accounts for
+the app to notice that it can now do push notifications. Some apps, like Elk,
+may cache the fact your server didn't support it for a while.
--- a/docs/tuning.rst
+++ b/docs/tuning.rst
@ -56,10 +56,13 @@ Stator (Task Processing)
 Takahē's background task processing system is called Stator, and it uses
 asynchronous Python to pack loads of tasks at once time into a single process.

-By default, it will try to run up to 100 tasks at once, with a maximum of 40
-from any single model (FanOut will usually be the one it's doing most of).
-You can tweak these with the ``TAKAHE_STATOR_CONCURRENCY`` and
-``TAKAHE_STATOR_CONCURRENCY_PER_MODEL`` environment variables.
+By default, it will try to run up to 20 tasks at once, with a maximum of 4 from
+any single model (FanOut will usually be the one it's doing most of). You can
+tweak these with the ``TAKAHE_STATOR_CONCURRENCY`` and
+``TAKAHE_STATOR_CONCURRENCY_PER_MODEL`` environment variables; for every extra
+element of concurrency you add, however, it will use an additional database
+connection in a new worker thread. Be wary of hitting your database's
+connection limits.

 The only real limits Stator can hit are CPU and memory usage; if you see your
 Stator (worker) containers not using anywhere near all of their CPU or memory,
@ -88,6 +91,50 @@ servers' timeouts make the connection fail) for more than about a week, some
 servers may consider it permanently unreachable and stop sending posts.


+Pruning
+-------
+
+Over time, the amount of Fediverse content your server consumes will grow -
+you'll see every reply to every post from every user you follow, and fetch
+every identity of every author of those replies.
+
+Obviously, you don't need all of this past a certain date, as it's unlikely
+you'll want to go back to view what the timeline would have looked like months
+ago. If you want to remove this data, you can run the two "pruning" commmands::
+
+  ./manage.py pruneposts
+  ./manage.py pruneidentities
+
+Each operates in batches, and takes an optional ``--number=1000`` argument
+to specify the batch size. The ``TAKAHE_REMOTE_PRUNE_HORIZON`` environment
+variable specifies the number of days of history you want to keep intact before
+the pruning happens - this defaults to 3 months.
+
+Post pruning removes any post that isn't:
+
+* Written by a local identity
+* Newer than ``TAKAHE_REMOTE_PRUNE_HORIZON`` days old
+* Favourited, bookmarked or boosted by a local identity
+* Replied to by a local identity
+* A reply to a local identity's post
+
+Identity pruning removes any identity that isn't:
+
+* A local identity
+* Newer than ``TAKAHE_REMOTE_PRUNE_HORIZON`` days old
+* Mentioned by a post by a local identity
+* Followed or blocked by a local identity
+* Following or blocking a local identity
+* A liker or booster of a local post
+
+We recommend you run the pruning commands on a scheduled basis (i.e. like
+a cronjob). They will return a ``1`` exit code if they deleted something and
+a ``0`` exit code if they found nothing to delete, if you want to put them in
+a loop that runs until deletion is complete::
+
+  until ./manage.py pruneposts; do sleep 1; done
+
+
 Caching
 -------

--- a/stator/management/commands/runstator.py
+++ b/stator/management/commands/runstator.py
@ -8,6 +8,8 @@ from core.models import Config
 from stator.models import StatorModel
 from stator.runner import StatorRunner

+logger = logging.getLogger(__name__)
+

 class Command(BaseCommand):
    help = "Runs a Stator runner"
@ -62,6 +64,12 @@ class Command(BaseCommand):
    ):
        # Cache system config
        Config.system = Config.load_system()
+        logging.basicConfig(
+            format="[%(asctime)s] %(levelname)8s - %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+            level=logging.INFO,
+            force=True,
+        )
        # Resolve the models list into names
        models = cast(
            list[type[StatorModel]],
@ -74,7 +82,7 @@ class Command(BaseCommand):
        if not models:
            models = StatorModel.subclasses
        models = [model for model in models if model not in excluded]
-        logging.info(
+        logger.info(
            "Running for models: " + " ".join(m._meta.label_lower for m in models)
        )
        # Run a runner
@ -88,4 +96,4 @@ class Command(BaseCommand):
        try:
            runner.run()
        except KeyboardInterrupt:
-            logging.critical("Ctrl-C received")
+            logger.critical("Ctrl-C received")
--- a/stator/models.py
+++ b/stator/models.py
@ -11,6 +11,8 @@ from django.utils.functional import classproperty
 from stator.exceptions import TryAgainLater
 from stator.graph import State, StateGraph

+logger = logging.getLogger(__name__)
+

 class StateField(models.CharField):
    """
@ -189,7 +191,7 @@ class StatorModel(models.Model):
        # If it's a manual progression state don't even try
        # We shouldn't really be here in this case, but it could be a race condition
        if current_state.externally_progressed:
-            logging.warning(
+            logger.warning(
                f"Warning: trying to progress externally progressed state {self.state}!"
            )
            return None
@ -203,7 +205,7 @@ class StatorModel(models.Model):
        except TryAgainLater:
            pass
        except BaseException as e:
-            logging.exception(e)
+            logger.exception(e)
        else:
            if next_state:
                # Ensure it's a State object
--- a/stator/runner.py
+++ b/stator/runner.py
@ -14,6 +14,8 @@ from core import sentry
 from core.models import Config
 from stator.models import StatorModel, Stats

+logger = logging.getLogger(__name__)
+

 class LoopingTimer:
    """
@ -84,7 +86,7 @@ class StatorRunner:
        self.scheduling_timer = LoopingTimer(self.schedule_interval)
        self.deletion_timer = LoopingTimer(self.delete_interval)
        # For the first time period, launch tasks
-        logging.info("Running main task loop")
+        logger.info("Running main task loop")
        try:
            with sentry.configure_scope() as scope:
                while True:
@ -137,18 +139,18 @@ class StatorRunner:
            pass

        # Wait for tasks to finish
-        logging.info("Waiting for tasks to complete")
+        logger.info("Waiting for tasks to complete")
        self.executor.shutdown()

        # We're done
-        logging.info("Complete")
+        logger.info("Complete")

    def alarm_handler(self, signum, frame):
        """
        Called when SIGALRM fires, which means we missed a schedule loop.
        Just exit as we're likely deadlocked.
        """
-        logging.warning("Watchdog timeout exceeded")
+        logger.warning("Watchdog timeout exceeded")
        os._exit(2)

    def load_config(self):
@ -163,13 +165,14 @@ class StatorRunner:
        """
        with sentry.start_transaction(op="task", name="stator.run_scheduling"):
            for model in self.models:
-                num = self.handled.get(model._meta.label_lower, 0)
-                if num or settings.DEBUG:
-                    logging.info(
-                        f"{model._meta.label_lower}: Scheduling ({num} handled)"
-                    )
-                self.submit_stats(model)
-                model.transition_clean_locks()
+                with sentry.start_span(description=model._meta.label_lower):
+                    num = self.handled.get(model._meta.label_lower, 0)
+                    if num or settings.DEBUG:
+                        logger.info(
+                            f"{model._meta.label_lower}: Scheduling ({num} handled)"
+                        )
+                    self.submit_stats(model)
+                    model.transition_clean_locks()

    def submit_stats(self, model: type[StatorModel]):
        """
@ -239,7 +242,7 @@ class StatorRunner:
                try:
                    task.result()
                except BaseException as e:
-                    logging.exception(e)
+                    logger.exception(e)

    def run_single_cycle(self):
        """
@ -269,11 +272,11 @@ def task_transition(instance: StatorModel, in_thread: bool = True):
        result = instance.transition_attempt()
        duration = time.monotonic() - started
        if result:
-            logging.info(
+            logger.info(
                f"{instance._meta.label_lower}: {instance.pk}: {instance.state} -> {result} ({duration:.2f}s)"
            )
        else:
-            logging.info(
+            logger.info(
                f"{instance._meta.label_lower}: {instance.pk}: {instance.state} unchanged  ({duration:.2f}s)"
            )
    if in_thread:
@ -289,7 +292,7 @@ def task_deletion(model: type[StatorModel], in_thread: bool = True):
        deleted = model.transition_delete_due()
        if not deleted:
            break
-        logging.info(f"{model._meta.label_lower}: Deleted {deleted} stale items")
+        logger.info(f"{model._meta.label_lower}: Deleted {deleted} stale items")
        time.sleep(1)
    if in_thread:
        close_old_connections()
--- a/takahe/init.py
+++ b/takahe/init.py
@ -1 +1 @@
-__version__ = "0.10.0"
+__version__ = "0.11.0"
--- a/takahe/settings.py
+++ b/takahe/settings.py
@ -28,7 +28,7 @@ class ImplicitHostname(AnyUrl):

 class MediaBackendUrl(AnyUrl):
    host_required = False
-    allowed_schemes = {"s3", "gs", "local"}
+    allowed_schemes = {"s3", "s3-insecure", "gs", "local"}


 def as_bool(v: str | list[str] | None):
@ -143,13 +143,13 @@ class Settings(BaseSettings):
    CACHES_DEFAULT: CacheBackendUrl | None = None

    # How long to wait, in days, until remote posts/profiles are pruned from
-    # our database if nobody local has interacted with them. Must be in rough
-    # multiples of two weeks. Set to zero to disable.
-    REMOTE_PRUNE_HORIZON: int = 0
+    # our database if nobody local has interacted with them.
+    # Set to zero to disable.
+    REMOTE_PRUNE_HORIZON: int = 90

    # Stator tuning
-    STATOR_CONCURRENCY: int = 50
-    STATOR_CONCURRENCY_PER_MODEL: int = 15
+    STATOR_CONCURRENCY: int = 20
+    STATOR_CONCURRENCY_PER_MODEL: int = 4

    # If user migration is allowed (off by default until outbound is done)
    ALLOW_USER_MIGRATION: bool = False
@ -432,7 +432,7 @@ if SETUP.MEDIA_BACKEND:
        if parsed.hostname is not None:
            port = parsed.port or 443
            GS_CUSTOM_ENDPOINT = f"https://{parsed.hostname}:{port}"
-    elif parsed.scheme == "s3":
+    elif (parsed.scheme == "s3") or (parsed.scheme == "s3-insecure"):
        STORAGES["default"]["BACKEND"] = "core.uploads.TakaheS3Storage"
        AWS_STORAGE_BUCKET_NAME = parsed.path.lstrip("/")
        AWS_QUERYSTRING_AUTH = False
@ -441,8 +441,14 @@ if SETUP.MEDIA_BACKEND:
            AWS_ACCESS_KEY_ID = parsed.username
            AWS_SECRET_ACCESS_KEY = urllib.parse.unquote(parsed.password)
        if parsed.hostname is not None:
-            port = parsed.port or 443
-            AWS_S3_ENDPOINT_URL = f"https://{parsed.hostname}:{port}"
+            if parsed.scheme == "s3-insecure":
+                s3_default_port = 80
+                s3_scheme = "http"
+            else:
+                s3_default_port = 443
+                s3_scheme = "https"
+            port = parsed.port or s3_default_port
+            AWS_S3_ENDPOINT_URL = f"{s3_scheme}://{parsed.hostname}:{port}"
        if SETUP.MEDIA_URL is not None:
            media_url_parsed = urllib.parse.urlparse(SETUP.MEDIA_URL)
            AWS_S3_CUSTOM_DOMAIN = media_url_parsed.hostname
--- a/tests/api/test_search.py
+++ b/tests/api/test_search.py
@ -0,0 +1,116 @@
+import pytest
+from pytest_httpx import HTTPXMock
+
+test_account_json = r"""
+{
+   "@context":[
+      "https://www.w3.org/ns/activitystreams",
+      "https://w3id.org/security/v1",
+      {
+         "manuallyApprovesFollowers":"as:manuallyApprovesFollowers",
+         "toot":"http://joinmastodon.org/ns#",
+         "featured":{
+            "@id":"toot:featured",
+            "@type":"@id"
+         },
+         "featuredTags":{
+            "@id":"toot:featuredTags",
+            "@type":"@id"
+         },
+         "movedTo":{
+            "@id":"as:movedTo",
+            "@type":"@id"
+         },
+         "schema":"http://schema.org#",
+         "PropertyValue":"schema:PropertyValue",
+         "value":"schema:value",
+         "discoverable":"toot:discoverable",
+         "Device":"toot:Device",
+         "deviceId":"toot:deviceId",
+         "messageType":"toot:messageType",
+         "cipherText":"toot:cipherText",
+         "suspended":"toot:suspended",
+         "memorial":"toot:memorial",
+         "indexable":"toot:indexable"
+      }
+   ],
+   "id":"https://search.example.com/users/searchtest",
+   "type":"Person",
+   "following":"https://search.example.com/users/searchtest/following",
+   "followers":"https://search.example.com/users/searchtest/followers",
+   "inbox":"https://search.example.com/users/searchtest/inbox",
+   "outbox":"https://search.example.com/users/searchtest/outbox",
+   "featured":"https://search.example.com/users/searchtest/collections/featured",
+   "featuredTags":"https://search.example.com/users/searchtest/collections/tags",
+   "preferredUsername":"searchtest",
+   "name":"searchtest",
+   "summary":"<p>Just a test (àáâãäåæ)</p>",
+   "url":"https://search.example.com/@searchtest",
+   "manuallyApprovesFollowers":false,
+   "discoverable":true,
+   "indexable":false,
+   "published":"2018-05-09T00:00:00Z",
+   "memorial":false,
+   "devices":"https://search.example.com/users/searchtest/collections/devices",
+   "endpoints":{
+      "sharedInbox":"https://search.example.com/inbox"
+   }
+}
+"""
+
+
+@pytest.mark.django_db
+def test_search_not_found(httpx_mock: HTTPXMock, api_client):
+    httpx_mock.add_response(status_code=404)
+    response = api_client.get(
+        "/api/v2/search",
+        content_type="application/json",
+        data={
+            "q": "https://notfound.example.com",
+        },
+    ).json()
+
+    assert response["accounts"] == []
+    assert response["statuses"] == []
+    assert response["hashtags"] == []
+
+
+@pytest.mark.django_db
+@pytest.mark.parametrize(
+    "encoding",
+    [
+        "utf-8",
+        "iso-8859-1",
+    ],
+)
+@pytest.mark.parametrize(
+    "content_type",
+    [
+        "application/json",
+        "application/ld+json",
+        "application/activity+json",
+    ],
+)
+def test_search(
+    content_type: str,
+    encoding: str,
+    httpx_mock: HTTPXMock,
+    api_client,
+):
+    httpx_mock.add_response(
+        headers={"Content-Type": f"{content_type}; charset={encoding}"},
+        content=test_account_json.encode(encoding),
+    )
+
+    response = api_client.get(
+        "/api/v2/search",
+        content_type="application/json",
+        data={
+            "q": "https://search.example.com/users/searchtest",
+        },
+    ).json()
+
+    assert len(response["accounts"]) == 1
+    assert response["accounts"][0]["acct"] == "searchtest@search.example.com"
+    assert response["accounts"][0]["username"] == "searchtest"
+    assert response["accounts"][0]["note"] == "<p>Just a test (àáâãäåæ)</p>"
--- a/tests/api/test_statuses.py
+++ b/tests/api/test_statuses.py
@ -56,6 +56,32 @@ def test_post_status(api_client, identity):
    assert response.status_code == 404


+@pytest.mark.django_db
+def test_post_statusless(api_client, identity):
+    """
+    Tests we can post with media but no status
+    """
+    # Create media attachment
+    attachment = PostAttachment.objects.create(
+        mimetype="image/webp",
+        name=None,
+        state=PostAttachmentStates.fetched,
+        author=identity,
+    )
+    # Post new one
+    response = api_client.post(
+        "/api/v1/statuses",
+        content_type="application/json",
+        data={
+            "media_ids": [attachment.id],
+        },
+    )
+    assert 200 <= response.status_code < 300
+    body = response.json()
+    assert body["content"] == "<p></p>"
+    assert body["media_attachments"][0]["description"] is None
+
+
@pytest.mark.django_db
 def test_mention_format(api_client, identity, remote_identity):
    """
--- a/tests/core/test_html.py
+++ b/tests/core/test_html.py
@ -1,4 +1,5 @@
 import pytest
+from django.template.defaultfilters import linebreaks_filter

 from core.html import FediverseHtmlParser

@ -101,6 +102,16 @@ def test_parser(identity):
    assert parser.plain_text == "@TeSt@ExamPle.com"
    assert parser.mentions == {"test@example.com"}

+    # Ensure hashtags are parsed and linkified in local posts
+    parser = FediverseHtmlParser(
+        linebreaks_filter("#tag1-x,#tag2 #标签。"), find_hashtags=True
+    )
+    assert (
+        parser.html
+        == '<p><a href="/tags/tag1/" rel="tag">#tag1</a>-x,<a href="/tags/tag2/" rel="tag">#tag2</a> <a href="/tags/标签/" rel="tag">#标签</a>。</p>'
+    )
+    assert parser.hashtags == {"tag1", "tag2", "标签"}
+
    # Ensure hashtags are linked, even through spans, but not within hrefs
    parser = FediverseHtmlParser(
        '<a href="http://example.com#notahashtag">something</a> <span>#</span>hashtag <a href="https://example.com/tags/hashtagtwo/">#hashtagtwo</a>',
--- a/tests/users/models/test_domain.py
+++ b/tests/users/models/test_domain.py
@ -3,6 +3,36 @@ import pytest
 from users.models import Domain


+def test_valid_domain():
+    """
+    Tests that a valid domain is valid
+    """
+
+    assert Domain.is_valid_domain("example.com")
+    assert Domain.is_valid_domain("xn----gtbspbbmkef.xn--p1ai")
+    assert Domain.is_valid_domain("underscore_subdomain.example.com")
+    assert Domain.is_valid_domain("something.versicherung")
+    assert Domain.is_valid_domain("11.com")
+    assert Domain.is_valid_domain("a.cn")
+    assert Domain.is_valid_domain("sub1.sub2.sample.co.uk")
+    assert Domain.is_valid_domain("somerandomexample.xn--fiqs8s")
+    assert not Domain.is_valid_domain("über.com")
+    assert not Domain.is_valid_domain("example.com:4444")
+    assert not Domain.is_valid_domain("example.-com")
+    assert not Domain.is_valid_domain("foo@bar.com")
+    assert not Domain.is_valid_domain("example.")
+    assert not Domain.is_valid_domain("example.com.")
+    assert not Domain.is_valid_domain("-example.com")
+    assert not Domain.is_valid_domain("_example.com")
+    assert not Domain.is_valid_domain("_example._com")
+    assert not Domain.is_valid_domain("example_.com")
+    assert not Domain.is_valid_domain("example")
+    assert not Domain.is_valid_domain("a......b.com")
+    assert not Domain.is_valid_domain("a.123")
+    assert not Domain.is_valid_domain("123.123")
+    assert not Domain.is_valid_domain("123.123.123.123")
+
+
@pytest.mark.django_db
 def test_recursive_block():
    """
--- a/users/management/init.py
+++ b/users/management/init.py
--- a/users/management/commands/init.py
+++ b/users/management/commands/init.py
--- a/users/management/commands/pruneidentities.py
+++ b/users/management/commands/pruneidentities.py
@ -0,0 +1,54 @@
+import sys
+
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from django.db.models import Q
+from django.utils import timezone
+
+from users.models import Identity
+
+
+class Command(BaseCommand):
+    help = "Prunes identities that have no local interaction"
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--number",
+            "-n",
+            type=int,
+            default=500,
+            help="The maximum number of identities to prune at once",
+        )
+
+    def handle(self, number: int, *args, **options):
+        if not settings.SETUP.REMOTE_PRUNE_HORIZON:
+            print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0")
+            sys.exit(2)
+        # Find a set of identities that match the initial criteria
+        print(f"Running query to find up to {number} unused identities...")
+        identities = Identity.objects.filter(
+            local=False,
+            created__lt=timezone.now(),
+        ).exclude(
+            Q(interactions__post__local=True)
+            | Q(posts__isnull=False)
+            | Q(posts_mentioning__isnull=False)
+            | Q(outbound_follows__isnull=False)
+            | Q(inbound_follows__isnull=False)
+            | Q(outbound_blocks__isnull=False)
+            | Q(inbound_blocks__isnull=False)
+        )[
+            :number
+        ]
+        identity_ids = identities.values_list("id", flat=True)
+        print(f"  found {len(identity_ids)}")
+        if not identity_ids:
+            sys.exit(0)
+
+        # Delete them
+        print("Deleting...")
+        number_deleted, deleted = Identity.objects.filter(id__in=identity_ids).delete()
+        print("Deleted:")
+        for model, model_deleted in deleted.items():
+            print(f"  {model}: {model_deleted}")
+        sys.exit(1)
--- a/users/migrations/0001_initial.py
+++ b/users/migrations/0001_initial.py
@ -9,6 +9,7 @@ from django.db import migrations, models
 import core.snowflake
 import core.uploads
 import stator.models
+import users.models.domain
 import users.models.follow
 import users.models.identity
 import users.models.inbox_message
@ -58,7 +59,12 @@ class Migration(migrations.Migration):
            fields=[
                (
                    "domain",
-                    models.CharField(max_length=250, primary_key=True, serialize=False),
+                    models.CharField(
+                        max_length=250,
+                        primary_key=True,
+                        serialize=False,
+                        validators=[users.models.domain._domain_validator],
+                    ),
                ),
                (
                    "service_domain",
--- a/users/models/block.py
+++ b/users/models/block.py
@ -37,7 +37,7 @@ class BlockStates(StateGraph):
        """
        # Mutes don't send but might need expiry
        if instance.mute:
-            return cls.awaiting_expiry
+            return cls.awaiting_expiry if instance.expires else cls.sent
        # Remote blocks should not be here, local blocks just work
        if not instance.source.local or instance.target.local:
            return cls.sent
@ -195,8 +195,7 @@ class Block(StatorModel):
            raise ValueError("You cannot mute from a remote Identity")
        block = cls.maybe_get(source=source, target=target, mute=True)
        if block is not None:
-            if not block.active:
-                block.state = BlockStates.new  # type:ignore
+            block.state = BlockStates.new  # type:ignore
            if duration:
                block.expires = timezone.now() + datetime.timedelta(seconds=duration)
            block.include_notifications = include_notifications
--- a/users/models/domain.py
+++ b/users/models/domain.py
@ -1,5 +1,6 @@
 import json
 import logging
+import re
 import ssl
 from functools import cached_property
 from typing import Optional
@ -8,12 +9,15 @@ import httpx
 import pydantic
 import urlman
 from django.conf import settings
+from django.core.exceptions import ValidationError
 from django.db import models

 from core.models import Config
 from stator.models import State, StateField, StateGraph, StatorModel
 from users.schemas import NodeInfo

+logger = logging.getLogger(__name__)
+

 class DomainStates(StateGraph):
    outdated = State(try_interval=60 * 30, force_initial=True)
@ -51,6 +55,14 @@ class DomainStates(StateGraph):
        return cls.outdated


+def _domain_validator(value: str):
+    if not Domain.is_valid_domain(value):
+        raise ValidationError(
+            "%(value)s is not a valid domain",
+            params={"value": value},
+        )
+
+
 class Domain(StatorModel):
    """
    Represents a domain that a user can have an account on.
@ -69,7 +81,9 @@ class Domain(StatorModel):
    display domains for now, until we start doing better probing.
    """

-    domain = models.CharField(max_length=250, primary_key=True)
+    domain = models.CharField(
+        max_length=250, primary_key=True, validators=[_domain_validator]
+    )
    service_domain = models.CharField(
        max_length=250,
        null=True,
@ -117,6 +131,19 @@ class Domain(StatorModel):
    class Meta:
        indexes: list = []

+    @classmethod
+    def is_valid_domain(cls, domain: str) -> bool:
+        """
+        Check if a domain is valid, domain must be lowercase
+        """
+        return (
+            re.match(
+                r"^(?:[a-z0-9](?:[a-z0-9-_]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-_]{0,61}[a-z]$",
+                domain,
+            )
+            is not None
+        )
+
    @classmethod
    def get_remote_domain(cls, domain: str) -> "Domain":
        return cls.objects.get_or_create(domain=domain.lower(), local=False)[0]
@ -209,13 +236,14 @@ class Domain(StatorModel):
                    and response.status_code < 500
                    and response.status_code not in [401, 403, 404, 406, 410]
                ):
-                    logging.warning(
-                        f"Client error fetching nodeinfo: {str(ex)}",
+                    logger.warning(
+                        "Client error fetching nodeinfo: %d %s %s",
+                        response.status_code,
+                        nodeinfo20_url,
+                        ex,
                        extra={
-                            "code": response.status_code,
                            "content": response.content,
                            "domain": self.domain,
-                            "nodeinfo20_url": nodeinfo20_url,
                        },
                    )
                return None
@ -223,11 +251,12 @@ class Domain(StatorModel):
            try:
                info = NodeInfo(**response.json())
            except (json.JSONDecodeError, pydantic.ValidationError) as ex:
-                logging.warning(
-                    f"Client error decoding nodeinfo: {str(ex)}",
+                logger.warning(
+                    "Client error decoding nodeinfo: %s %s",
+                    nodeinfo20_url,
+                    ex,
                    extra={
                        "domain": self.domain,
-                        "nodeinfo20_url": nodeinfo20_url,
                    },
                )
                return None
--- a/users/models/follow.py
+++ b/users/models/follow.py
@ -11,11 +11,13 @@ from users.models.block import Block
 from users.models.identity import Identity
 from users.models.inbox_message import InboxMessage

+logger = logging.getLogger(__name__)
+

 class FollowStates(StateGraph):
    unrequested = State(try_interval=600)
    pending_approval = State(externally_progressed=True)
-    accepting = State(try_interval=24 * 60 * 60)
+    accepting = State(try_interval=600)
    rejecting = State(try_interval=24 * 60 * 60)
    accepted = State(externally_progressed=True)
    undone = State(try_interval=24 * 60 * 60)
@ -79,7 +81,9 @@ class FollowStates(StateGraph):
            except httpx.RequestError:
                return
            return cls.pending_approval
-        # local/remote follow local, check manually_approve
+        # local/remote follow local, check deleted & manually_approve
+        if instance.target.deleted:
+            return cls.rejecting
        if instance.target.manually_approves_followers:
            from activities.models import TimelineEvent

@ -90,6 +94,9 @@ class FollowStates(StateGraph):
    @classmethod
    def handle_accepting(cls, instance: "Follow"):
        if not instance.source.local:
+            # Don't send Accept if remote identity wasn't fetch yet
+            if not instance.source.inbox_uri:
+                return
            # send an Accept object to the source server
            try:
                instance.target.signed_request(
@ -275,7 +282,7 @@ class Follow(StatorModel):
        """
        return {
            "type": "Accept",
-            "id": self.uri + "#accept",
+            "id": f"{self.target.actor_uri}follow/{self.id}/#accept",
            "actor": self.target.actor_uri,
            "object": self.to_ap(),
        }
@ -286,7 +293,7 @@ class Follow(StatorModel):
        """
        return {
            "type": "Reject",
-            "id": self.uri + "#reject",
+            "id": f"{self.target.actor_uri}follow/{self.id}/#reject",
            "actor": self.target.actor_uri,
            "object": self.to_ap(),
        }
@ -350,7 +357,7 @@ class Follow(StatorModel):
            try:
                follow = cls.by_ap(data, create=True)
            except Identity.DoesNotExist:
-                logging.info(
+                logger.info(
                    "Identity not found for incoming Follow", extra={"data": data}
                )
                return
@ -367,7 +374,7 @@ class Follow(StatorModel):
        try:
            follow = cls.by_ap(data["object"])
        except (cls.DoesNotExist, Identity.DoesNotExist):
-            logging.info(
+            logger.info(
                "Follow or Identity not found for incoming Accept",
                extra={"data": data},
            )
@ -389,7 +396,7 @@ class Follow(StatorModel):
        try:
            follow = cls.by_ap(data["object"])
        except (cls.DoesNotExist, Identity.DoesNotExist):
-            logging.info(
+            logger.info(
                "Follow or Identity not found for incoming Reject",
                extra={"data": data},
            )
@ -419,7 +426,7 @@ class Follow(StatorModel):
        try:
            follow = cls.by_ap(data["object"])
        except (cls.DoesNotExist, Identity.DoesNotExist):
-            logging.info(
+            logger.info(
                "Follow or Identity not found for incoming Undo", extra={"data": data}
            )
            return
--- a/users/models/identity.py
+++ b/users/models/identity.py
@ -14,6 +14,7 @@ from lxml import etree

 from core.exceptions import ActorMismatchError
 from core.html import ContentRenderer, FediverseHtmlParser
+from core.json import json_from_response
 from core.ld import (
    canonicalise,
    format_ld_date,
@ -37,6 +38,8 @@ from users.models.domain import Domain
 from users.models.inbox_message import InboxMessage
 from users.models.system_actor import SystemActor

+logger = logging.getLogger(__name__)
+

 class IdentityStates(StateGraph):
    """
@ -117,12 +120,47 @@ class IdentityStates(StateGraph):

    @classmethod
    def handle_deleted(cls, instance: "Identity"):
-        from activities.models import FanOut
+        from activities.models import (
+            FanOut,
+            Post,
+            PostInteraction,
+            PostInteractionStates,
+            PostStates,
+            TimelineEvent,
+        )
+        from users.models import Bookmark, Follow, FollowStates, HashtagFollow, Report

        if not instance.local:
            return cls.updated

+        # Delete local data
+        TimelineEvent.objects.filter(identity=instance).delete()
+        Bookmark.objects.filter(identity=instance).delete()
+        HashtagFollow.objects.filter(identity=instance).delete()
+        Report.objects.filter(source_identity=instance).delete()
+        # Nullify all fields and fanout
+        instance.name = ""
+        instance.summary = ""
+        instance.metadata = []
+        instance.aliases = []
+        instance.icon_uri = ""
+        instance.discoverable = False
+        instance.image.delete(save=False)
+        instance.icon.delete(save=False)
+        instance.save()
+        cls.targets_fan_out(instance, FanOut.Types.identity_edited)
+        # Delete all posts and interactions
+        Post.transition_perform_queryset(instance.posts, PostStates.deleted)
+        PostInteraction.transition_perform_queryset(
+            instance.interactions, PostInteractionStates.undone
+        )
+        # Fanout the deletion and unfollow from both directions
        cls.targets_fan_out(instance, FanOut.Types.identity_deleted)
+        for follower in Follow.objects.filter(target=instance):
+            follower.transition_perform(FollowStates.rejecting)
+        for following in Follow.objects.filter(source=instance):
+            following.transition_perform(FollowStates.undone)
+
        return cls.deleted_fanned_out

    @classmethod
@ -136,7 +174,7 @@ class IdentityStates(StateGraph):

    @classmethod
    def handle_updated(cls, instance: "Identity"):
-        if instance.state_age > Config.system.identity_max_age:
+        if not instance.local and instance.state_age > Config.system.identity_max_age:
            return cls.outdated


@ -397,6 +435,8 @@ class Identity(StatorModel):
            domain = domain.domain
        else:
            domain = domain.lower()
+            domain_instance = Domain.get_domain(domain)
+            local = domain_instance.local if domain_instance else local

        with transaction.atomic():
            try:
@ -672,10 +712,11 @@ class Identity(StatorModel):
        """
        Marks the identity and all of its related content as deleted.
        """
-        # Move all posts to deleted
-        from activities.models.post import Post, PostStates
+        from api.models import Authorization, Token

-        Post.transition_perform_queryset(self.posts, PostStates.deleted)
+        # Remove all login tokens
+        Authorization.objects.filter(identity=self).delete()
+        Token.objects.filter(identity=self).delete()
        # Remove all users from ourselves and mark deletion date
        self.users.set([])
        self.deleted = timezone.now()
@ -872,15 +913,18 @@ class Identity(StatorModel):
                # Their account got deleted, so let's do the same.
                Identity.objects.filter(pk=self.pk).delete()
            if status_code < 500 and status_code not in [401, 403, 404, 406, 410]:
-                logging.info(
+                logger.info(
                    "Client error fetching actor: %d %s", status_code, self.actor_uri
                )
            return False
+        json_data = json_from_response(response)
+        if not json_data:
+            return False
        try:
-            document = canonicalise(response.json(), include_security=True)
+            document = canonicalise(json_data, include_security=True)
        except ValueError:
            # servers with empty or invalid responses are inevitable
-            logging.info(
+            logger.info(
                "Invalid response fetching actor %s",
                self.actor_uri,
                extra={
@ -942,10 +986,10 @@ class Identity(StatorModel):
                    self.domain = Domain.get_remote_domain(webfinger_domain)
            except TryAgainLater:
                # continue with original domain when webfinger times out
-                logging.info("WebFinger timed out: %s", self.actor_uri)
+                logger.info("WebFinger timed out: %s", self.actor_uri)
                pass
            except ValueError as exc:
-                logging.info(
+                logger.info(
                    "Can't parse WebFinger: %s %s",
                    exc.args[0],
                    self.actor_uri,
--- a/users/models/report.py
+++ b/users/models/report.py
@ -172,7 +172,7 @@ class Report(StatorModel):
            subject_post=subject_post,
            source_domain=Domain.get_remote_domain(domain_id),
            type="remote",
-            complaint=data.get("content"),
+            complaint=str(data.get("content", "")),
        )

    def to_ap(self):
--- a/users/services/identity.py
+++ b/users/services/identity.py
@ -19,6 +19,8 @@ from users.models import (
    User,
 )

+logger = logging.getLogger(__name__)
+

 class IdentityService:
    """
@ -226,7 +228,7 @@ class IdentityService:
                        state__in=PostInteractionStates.group_active(),
                    )
                except MultipleObjectsReturned as exc:
-                    logging.exception("%s on %s", exc, object_uri)
+                    logger.exception("%s on %s", exc, object_uri)
                    pass
                except Post.DoesNotExist:
                    # ignore 404s...
--- a/users/shortcuts.py
+++ b/users/shortcuts.py
@ -18,6 +18,8 @@ def by_handle_or_404(request, handle, local=True, fetch=False) -> Identity:
        domain = domain_instance.domain
    else:
        username, domain = handle.split("@", 1)
+        if not Domain.is_valid_domain(domain):
+            raise Http404("Invalid domain")
        # Resolve the domain to the display domain
        domain_instance = Domain.get_domain(domain)
        if domain_instance is None:
--- a/users/views/activitypub.py
+++ b/users/views/activitypub.py
@ -26,6 +26,8 @@ from users.models import Identity, InboxMessage, SystemActor
 from users.models.domain import Domain
 from users.shortcuts import by_handle_or_404

+logger = logging.getLogger(__name__)
+

 class HttpResponseUnauthorized(HttpResponse):
    status_code = 401
@ -147,7 +149,7 @@ class Inbox(View):
        # This ensures that the signature used for the headers matches the actor
        # described in the payload.
        if "actor" not in document:
-            logging.warning("Inbox error: unspecified actor")
+            logger.warning("Inbox error: unspecified actor")
            return HttpResponseBadRequest("Unspecified actor")

        identity = Identity.by_actor_uri(document["actor"], create=True, transient=True)
@ -167,7 +169,7 @@ class Inbox(View):
            domain = Domain.get_remote_domain(actor_url_parts.hostname)
        if identity.blocked or domain.recursively_blocked():
            # I love to lie! Throw it away!
-            logging.info(
+            logger.info(
                "Inbox: Discarded message from blocked %s %s",
                "domain" if domain.recursively_blocked() else "user",
                identity.actor_uri,
@ -196,21 +198,21 @@ class Inbox(View):
                        request,
                        identity.public_key,
                    )
-                    logging.debug(
+                    logger.debug(
                        "Inbox: %s from %s has good HTTP signature",
                        document_type,
                        identity,
                    )
                else:
-                    logging.info(
+                    logger.info(
                        "Inbox: New actor, no key available: %s",
                        document["actor"],
                    )
            except VerificationFormatError as e:
-                logging.warning("Inbox error: Bad HTTP signature format: %s", e.args[0])
+                logger.warning("Inbox error: Bad HTTP signature format: %s", e.args[0])
                return HttpResponseBadRequest(e.args[0])
            except VerificationError:
-                logging.warning("Inbox error: Bad HTTP signature from %s", identity)
+                logger.warning("Inbox error: Bad HTTP signature from %s", identity)
                return HttpResponseUnauthorized("Bad signature")

        # Mastodon advices not implementing LD Signatures, but
@ -224,18 +226,18 @@ class Inbox(View):
                    creator, create=True, transient=True
                )
                if not creator_identity.public_key:
-                    logging.info("Inbox: New actor, no key available: %s", creator)
+                    logger.info("Inbox: New actor, no key available: %s", creator)
                    # if we can't verify it, we don't keep it
                    document.pop("signature")
                else:
                    LDSignature.verify_signature(document, creator_identity.public_key)
-                    logging.debug(
+                    logger.debug(
                        "Inbox: %s from %s has good LD signature",
                        document["type"],
                        creator_identity,
                    )
            except VerificationFormatError as e:
-                logging.warning("Inbox error: Bad LD signature format: %s", e.args[0])
+                logger.warning("Inbox error: Bad LD signature format: %s", e.args[0])
                return HttpResponseBadRequest(e.args[0])
            except VerificationError:
                # An invalid LD Signature might also indicate nothing but
@ -243,14 +245,14 @@ class Inbox(View):
                # Strip it out if we can't verify it.
                if "signature" in document:
                    document.pop("signature")
-                logging.info(
+                logger.info(
                    "Inbox: Stripping invalid LD signature from %s %s",
                    creator_identity,
                    document["id"],
                )

        if not ("signature" in request or "signature" in document):
-            logging.debug(
+            logger.debug(
                "Inbox: %s from %s is unauthenticated. That's OK.",
                document["type"],
                identity,
--- a/users/views/admin/announcements.py
+++ b/users/views/admin/announcements.py
@ -45,6 +45,7 @@ class AnnouncementCreate(CreateView):
 class AnnouncementEdit(UpdateView):
    model = Announcement
    template_name = "admin/announcement_edit.html"
+    success_url = Announcement.urls.admin_root
    extra_context = {"section": "announcements"}

    def get_context_data(self, **kwargs):
--- a/users/views/admin/settings.py
+++ b/users/views/admin/settings.py
@ -38,6 +38,10 @@ class BasicSettings(AdminSettingsPage):
            "title": "Maximum Post Length",
            "help_text": "The maximum number of characters allowed per post",
        },
+        "max_media_attachments": {
+            "title": "Maximum Media Attachments",
+            "help_text": "The maximum number of media attachments allowed per post.\nA value other than 4 may be unsupported by clients.",
+        },
        "post_minimum_interval": {
            "title": "Minimum Posting Interval",
            "help_text": "The minimum number of seconds a user must wait between posts",
@ -129,6 +133,7 @@ class BasicSettings(AdminSettingsPage):
        ],
        "Posts": [
            "post_length",
+            "max_media_attachments",
            "post_minimum_interval",
            "content_warning_text",
            "hashtag_unreviewed_are_public",
Author	SHA1	Message	Date
Jamie Bliss	024b01a144	Add myself to a few docs (#713 ) * Add myself to a few docs * Update conf.py	2024-05-21 14:11:53 -04:00
Henri Dickson	8f17b81912	fix 500 when save edited annoucements (#708 )	2024-05-21 13:55:17 -04:00
Andrew Godwin	7c34ac78ed	Write a release checklist and do a couple things on it	2024-02-06 14:49:35 -07:00
Henri Dickson	72eb6a6271	add application/activity+json to accept header to improve compatibility (#694 )	2024-02-05 21:40:04 -05:00
Jamie Bliss	b2223ddf42	Back out push notification changes	2024-02-05 21:18:59 -05:00
Jamie Bliss	045a499ddf	Fix docs	2024-02-05 20:59:22 -05:00
Jamie Bliss	0fa48578f2	Write release notes for 0.11.0	2024-02-05 20:53:09 -05:00
Henri Dickson	f86f3a49e4	Fix when report ap message does not have content field (#689 )	2024-01-08 19:48:21 -07:00
Henri Dickson	2f4daa02bd	Add missing validator to initial migration (#687 )	2024-01-04 08:59:26 -07:00
Henri Dickson	798222dcdb	Post update/delete also fanout to those who liked/boosted it but not following the author (#684 )	2023-12-31 11:06:30 -07:00
Henri Dickson	74b3ac551a	Fix accept/reject follow request (#683 )	2023-12-27 11:48:09 -07:00
Henri Dickson	4a09379e09	Fix federating with GoToSocial (#682 )	2023-12-26 10:26:03 -07:00
Henri Dickson	448092d6d9	Improve identity deletion (#678 )	2023-12-16 23:49:59 +00:00
Henri Dickson	5d508a17ec	Basic protection against invalid domain names (#680 )	2023-12-13 09:04:41 +00:00
Jamie Bliss	d07482f5a8	Allow statusless posts (#677 )	2023-12-07 16:32:18 -07:00
Henri Dickson	123c20efb1	When remote follows local, hold off sending Accept if remote identity is not fully fetched (#676 )	2023-12-06 11:08:41 -07:00
Karthik Balakrishnan	83607779cd	Fix README: 0.10.1 is latest release (#675 )	2023-12-01 09:11:18 -07:00
Andrew Godwin	837320f461	Invert pruning exit codes	2023-12-01 00:03:09 -07:00
Rob	5f28d702f8	Make max_media_attachments configurable by admin (#669 )	2023-11-28 09:52:04 -07:00
Henri Dickson	ac7fef4b28	Do not fetch webfinger if when querying identity on local domain (#668 )	2023-11-26 21:00:58 -07:00
Henri Dickson	6855e74c6f	Do not retry unmute if mute never expires	2023-11-26 12:46:31 -07:00
Henri Dickson	a58d7ccd8f	Do not make local identities outdated (#667 )	2023-11-26 11:19:18 -07:00
Rob	1a728ea023	Add s3-insecure to pydantic checker (#665 )	2023-11-26 11:13:55 -07:00
Humberto Rocha	b031880e41	Extract json parser to core and use in fetch_actor (#663 )	2023-11-20 11:46:51 -07:00
Humberto Rocha	81d019ad0d	Improve search api json parsing (#662 )	2023-11-19 11:32:35 -07:00
Henri Dickson	5267e4108c	Allow unicode characters in hashtag (#659 )	2023-11-19 09:58:20 -07:00
Henri Dickson	b122e2beda	Fix fetching post from another takahe by searching its url (#661 )	2023-11-18 21:03:51 -07:00
Rob	ae1bfc49a7	Add s3-insecure for S3 backend (#658 )	2023-11-17 21:49:06 -07:00
Osma Ahvenlampi	1ceef59bec	Module-specific loggers and minor reformatting (#657 )	2023-11-16 10:27:20 -07:00
Humberto Rocha	2f546dfa74	Do not canonicalise non json content in the search endpoint (#654 )	2023-11-15 15:00:56 -07:00
Andrew Godwin	cc9e397f60	Ensure post pruning has a random selection element	2023-11-14 00:04:18 -07:00
Andrew Godwin	dc397903b2	Fix release date formatting	2023-11-13 12:18:30 -07:00
Andrew Godwin	debf4670e8	Releasing 0.10.1	2023-11-13 12:16:40 -07:00
Andrew Godwin	e49bfc4775	Add Stator tuning notes	2023-11-13 10:52:22 -07:00
Andrew Godwin	308dd033e1	Significantly drop the default settings for stator	2023-11-13 10:39:21 -07:00
Andrew Godwin	460d1d7e1c	Don't prune replies to local, add docs	2023-11-12 18:32:38 -07:00
Andrew Godwin	eb0b0d775c	Don't delete mentioned people	2023-11-12 18:06:29 -07:00
Andrew Godwin	74f69a3813	Add identity pruning, improve post pruning	2023-11-12 18:01:01 -07:00
Andrew Godwin	9fc497f826	Mention that the final number includes dependencies	2023-11-12 17:12:05 -07:00
Andrew Godwin	ab3648e05d	Add console logging back to Stator	2023-11-12 16:49:01 -07:00
Andrew Godwin	476f817464	Only consider local replies	2023-11-12 16:31:20 -07:00
Andrew Godwin	99e7fb8639	Fix prune issues when multiple replies	2023-11-12 16:30:49 -07:00
Andrew Godwin	87344b47b5	Add manual post pruning command	2023-11-12 16:23:43 -07:00
Andrew Godwin	aa39ef0571	Move remote pruning note over to 0.11	2023-11-12 14:43:45 -07:00
Andrew Godwin	110a5e64dc	"a" to "our" is important meaning	2023-11-12 14:42:59 -07:00
Andrew Godwin	bae76c3063	Add 0.10 to release index with date	2023-11-12 14:39:24 -07:00