From f8afdfc6a923bfd57a2669accf62a284712f7471 Mon Sep 17 00:00:00 2001 From: Willi Hohenstein Date: Thu, 27 Jan 2022 17:30:02 +0100 Subject: [PATCH] This fixes #1871 huge amount of traffic from bots and closes #1873 that leaves a file after testing. --- bookwyrm/templates/robots.txt | 75 ++++++++++++++++++- bookwyrm/tests/views/admin/test_federation.py | 6 +- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/bookwyrm/templates/robots.txt b/bookwyrm/templates/robots.txt index dc7b6bcbb..4e7f1a4a9 100644 --- a/bookwyrm/templates/robots.txt +++ b/bookwyrm/templates/robots.txt @@ -1,5 +1,78 @@ # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file +##### AhrefsBot ##### +# see http://ahrefs.com/robot/ +User-agent: AhrefsBot +Crawl-Delay: 10 +#Disallow: / + + +##### SemrushBot ##### +# see http://www.semrush.com/bot.html +User-agent: SemrushBot +Crawl-Delay: 10 +#Disallow: / + +# To block SemrushBot from crawling your site for different SEO and technical issues: +User-agent: SiteAuditBot +Disallow: / + +#To block SemrushBot from crawling your site for Backlink Audit tool: +User-agent: SemrushBot-BA +Disallow: / + +#To block SemrushBot from crawling your site for On Page SEO Checker tool and similar tools: +User-agent: SemrushBot-SI +Disallow: / + +#To block SemrushBot from checking URLs on your site for SWA tool: +User-agent: SemrushBot-SWA +Disallow: / + +#To block SemrushBot from crawling your site for Content Analyzer and Post Tracking tools: +User-agent: SemrushBot-CT +Disallow: / + +#To block SemrushBot from crawling your site for Brand Monitoring: +User-agent: SemrushBot-BM +Disallow: / + +#To block SplitSignalBot from crawling your site for SplitSignal tool: +User-agent: SplitSignalBot +Disallow: / + +#To block SemrushBot-COUB from crawling your site for Content Outline Builder tool: +User-agent: SemrushBot-COUB +Disallow: / + + +##### DotBot ##### +# see https://opensiteexplorer.org/dotbot +User-agent: dotbot +Crawl-delay: 10 +#Disallow: / + + +##### BLEXBot ##### +# see http://webmeup-crawler.com/ +User-agent: BLEXBot +Crawl-delay: 10 +#Disallow: / + + +##### MJ12bot ##### +# see http://mj12bot.com/ +User-Agent: MJ12bot +Crawl-Delay: 20 +#Disallow: / + + +##### PetalBot ##### +# see https://webmaster.petalsearch.com/site/petalbot +User-agent: PetalBot +Disallow: / + + User-agent: * Disallow: /static/js/ -Disallow: /static/css/ +Disallow: /static/css/ \ No newline at end of file diff --git a/bookwyrm/tests/views/admin/test_federation.py b/bookwyrm/tests/views/admin/test_federation.py index deed5bd38..be37f0630 100644 --- a/bookwyrm/tests/views/admin/test_federation.py +++ b/bookwyrm/tests/views/admin/test_federation.py @@ -1,4 +1,5 @@ """ test for app action functionality """ +import os import json from unittest.mock import patch @@ -39,7 +40,7 @@ class FederationViews(TestCase): ) models.SiteSettings.objects.create() - + def test_federation_page(self): """there are so many views, this just makes sure it LOADS""" view = views.Federation.as_view() @@ -207,3 +208,6 @@ class FederationViews(TestCase): created = models.FederatedServer.objects.get(server_name="server.name") self.assertEqual(created.status, "blocked") self.assertEqual(created.notes, "https://explanation.url") + + # remove file.json after test + os.remove("file.json") \ No newline at end of file