Templatize robots.txt (#478)

This commit is contained in:
Corry Haines 2023-01-29 11:27:07 -08:00 committed by GitHub
parent aeba38b8ae
commit 93c0af992b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 5 deletions

View file

@ -2,6 +2,7 @@ import json
from typing import ClassVar
import markdown_it
from django.conf import settings
from django.http import HttpResponse
from django.shortcuts import redirect
from django.templatetags.static import static
@ -69,6 +70,23 @@ class StaticContentView(View):
raise NotImplementedError()
@method_decorator(cache_page(60 * 60), name="dispatch")
class RobotsTxt(TemplateView):
"""
Serves the robots.txt for Takahē
To specify additional user-agents to disallow, use TAKAHE_ROBOTS_TXT_DISALLOWED_USER_AGENTS
"""
template_name = "robots.txt"
content_type = "text/plain"
def get_context_data(self):
return {
"user_agents": getattr(settings, "ROBOTS_TXT_DISALLOWED_USER_AGENTS", []),
}
@method_decorator(cache_control(max_age=60 * 15), name="dispatch")
class AppManifest(StaticContentView):
"""

View file

@ -29,11 +29,6 @@ server {
proxy_hide_header X-Takahe-User;
proxy_hide_header X-Takahe-Identity;
# Serve robots.txt from the non-collected dir as a special case.
location /robots.txt {
alias /takahe/static/robots.txt;
}
# Serves static files from the collected dir
location /static/ {
# Files in static have cache-busting hashes in the name, thus can be cached forever

View file

@ -105,6 +105,10 @@ class Settings(BaseSettings):
AUTO_ADMIN_EMAIL: EmailStr | None = None
ERROR_EMAILS: list[EmailStr] | None = None
#: If set, a list of user agents to completely disallow in robots.txt
#: List formatting must be a valid JSON list, such as `["Agent1", "Agent2"]`
ROBOTS_TXT_DISALLOWED_USER_AGENTS: list[str] = Field(default_factory=list)
MEDIA_URL: str = "/media/"
MEDIA_ROOT: str = str(BASE_DIR / "media")
MEDIA_BACKEND: MediaBackendUrl | None = None
@ -313,6 +317,8 @@ STATOR_TOKEN = SETUP.STATOR_TOKEN
STATOR_CONCURRENCY = SETUP.STATOR_CONCURRENCY
STATOR_CONCURRENCY_PER_MODEL = SETUP.STATOR_CONCURRENCY_PER_MODEL
ROBOTS_TXT_DISALLOWED_USER_AGENTS = SETUP.ROBOTS_TXT_DISALLOWED_USER_AGENTS
CORS_ORIGIN_ALLOW_ALL = True # Temporary
CORS_ORIGIN_WHITELIST = SETUP.CORS_HOSTS
CORS_ALLOW_CREDENTIALS = True

View file

@ -19,6 +19,7 @@ from users.views import (
urlpatterns = [
path("", core.homepage),
path("robots.txt", core.RobotsTxt.as_view()),
path("manifest.json", core.AppManifest.as_view()),
# Activity views
path("notifications/", timelines.Notifications.as_view(), name="notifications"),

13
templates/robots.txt Normal file
View file

@ -0,0 +1,13 @@
User-Agent: *
# Don't allow any bot to crawl tags.
Disallow: /tags/
Disallow: /tags/*
# Don't allow bots to crawl through the proxy
Disallow: /proxy/*
{% for user_agent in user_agents %}
User-agent: {{user_agent}}
Disallow: /
{% endfor %}