From f8afdfc6a923bfd57a2669accf62a284712f7471 Mon Sep 17 00:00:00 2001 From: Willi Hohenstein Date: Thu, 27 Jan 2022 17:30:02 +0100 Subject: [PATCH 1/3] This fixes #1871 huge amount of traffic from bots and closes #1873 that leaves a file after testing. --- bookwyrm/templates/robots.txt | 75 ++++++++++++++++++- bookwyrm/tests/views/admin/test_federation.py | 6 +- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/bookwyrm/templates/robots.txt b/bookwyrm/templates/robots.txt index dc7b6bcbb..4e7f1a4a9 100644 --- a/bookwyrm/templates/robots.txt +++ b/bookwyrm/templates/robots.txt @@ -1,5 +1,78 @@ # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file +##### AhrefsBot ##### +# see http://ahrefs.com/robot/ +User-agent: AhrefsBot +Crawl-Delay: 10 +#Disallow: / + + +##### SemrushBot ##### +# see http://www.semrush.com/bot.html +User-agent: SemrushBot +Crawl-Delay: 10 +#Disallow: / + +# To block SemrushBot from crawling your site for different SEO and technical issues: +User-agent: SiteAuditBot +Disallow: / + +#To block SemrushBot from crawling your site for Backlink Audit tool: +User-agent: SemrushBot-BA +Disallow: / + +#To block SemrushBot from crawling your site for On Page SEO Checker tool and similar tools: +User-agent: SemrushBot-SI +Disallow: / + +#To block SemrushBot from checking URLs on your site for SWA tool: +User-agent: SemrushBot-SWA +Disallow: / + +#To block SemrushBot from crawling your site for Content Analyzer and Post Tracking tools: +User-agent: SemrushBot-CT +Disallow: / + +#To block SemrushBot from crawling your site for Brand Monitoring: +User-agent: SemrushBot-BM +Disallow: / + +#To block SplitSignalBot from crawling your site for SplitSignal tool: +User-agent: SplitSignalBot +Disallow: / + +#To block SemrushBot-COUB from crawling your site for Content Outline Builder tool: +User-agent: SemrushBot-COUB +Disallow: / + + +##### DotBot ##### +# see https://opensiteexplorer.org/dotbot +User-agent: dotbot +Crawl-delay: 10 +#Disallow: / + + +##### BLEXBot ##### +# see http://webmeup-crawler.com/ +User-agent: BLEXBot +Crawl-delay: 10 +#Disallow: / + + +##### MJ12bot ##### +# see http://mj12bot.com/ +User-Agent: MJ12bot +Crawl-Delay: 20 +#Disallow: / + + +##### PetalBot ##### +# see https://webmaster.petalsearch.com/site/petalbot +User-agent: PetalBot +Disallow: / + + User-agent: * Disallow: /static/js/ -Disallow: /static/css/ +Disallow: /static/css/ \ No newline at end of file diff --git a/bookwyrm/tests/views/admin/test_federation.py b/bookwyrm/tests/views/admin/test_federation.py index deed5bd38..be37f0630 100644 --- a/bookwyrm/tests/views/admin/test_federation.py +++ b/bookwyrm/tests/views/admin/test_federation.py @@ -1,4 +1,5 @@ """ test for app action functionality """ +import os import json from unittest.mock import patch @@ -39,7 +40,7 @@ class FederationViews(TestCase): ) models.SiteSettings.objects.create() - + def test_federation_page(self): """there are so many views, this just makes sure it LOADS""" view = views.Federation.as_view() @@ -207,3 +208,6 @@ class FederationViews(TestCase): created = models.FederatedServer.objects.get(server_name="server.name") self.assertEqual(created.status, "blocked") self.assertEqual(created.notes, "https://explanation.url") + + # remove file.json after test + os.remove("file.json") \ No newline at end of file From dd35c90042cd0b5c28a4fbedfce7a3a249362227 Mon Sep 17 00:00:00 2001 From: Willi Hohenstein Date: Thu, 27 Jan 2022 19:54:12 +0100 Subject: [PATCH 2/3] fix linging errors --- bookwyrm/templates/robots.txt | 2 +- bookwyrm/tests/views/admin/test_federation.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bookwyrm/templates/robots.txt b/bookwyrm/templates/robots.txt index 4e7f1a4a9..a328b6e90 100644 --- a/bookwyrm/templates/robots.txt +++ b/bookwyrm/templates/robots.txt @@ -75,4 +75,4 @@ Disallow: / User-agent: * Disallow: /static/js/ -Disallow: /static/css/ \ No newline at end of file +Disallow: /static/css/ diff --git a/bookwyrm/tests/views/admin/test_federation.py b/bookwyrm/tests/views/admin/test_federation.py index be37f0630..19c8909f0 100644 --- a/bookwyrm/tests/views/admin/test_federation.py +++ b/bookwyrm/tests/views/admin/test_federation.py @@ -40,7 +40,7 @@ class FederationViews(TestCase): ) models.SiteSettings.objects.create() - + def test_federation_page(self): """there are so many views, this just makes sure it LOADS""" view = views.Federation.as_view() @@ -210,4 +210,5 @@ class FederationViews(TestCase): self.assertEqual(created.notes, "https://explanation.url") # remove file.json after test - os.remove("file.json") \ No newline at end of file + os.remove("file.json") + \ No newline at end of file From 72c22cdcc0a5e6666f037745135b59aeb5308d8a Mon Sep 17 00:00:00 2001 From: Willi Hohenstein Date: Thu, 27 Jan 2022 21:21:28 +0100 Subject: [PATCH 3/3] reformatted file --- bookwyrm/tests/views/admin/test_federation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bookwyrm/tests/views/admin/test_federation.py b/bookwyrm/tests/views/admin/test_federation.py index 19c8909f0..340ed6052 100644 --- a/bookwyrm/tests/views/admin/test_federation.py +++ b/bookwyrm/tests/views/admin/test_federation.py @@ -211,4 +211,3 @@ class FederationViews(TestCase): # remove file.json after test os.remove("file.json") - \ No newline at end of file