commit ec356baa82d67ea36aa3a32ce99c341e89fc9960
parent f8aa717e97594405e80157674ac355e15da4b8af
Author: Natalie Pendragon <natpen@natpen.net>
Date: Wed, 3 Jun 2020 12:50:39 -0400
[crawl] Ignore some troublesome content from alexschroeder.ch
Diffstat:
1 file changed, 10 insertions(+), 0 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -100,6 +100,16 @@ EXCLUDED_URL_PREFIXES = [
# Geddit
"gemini://geddit.pitr.ca/post?",
"gemini://geddit.pitr.ca/c/",
+
+ # alexschroeder.ch b/c its robots.txt isn't working...
+ "gemini://alexschroeder.ch/map/",
+ "gemini://alexschroeder.ch/do/rc",
+ "gemini://alexschroeder.ch/do/rss",
+ "gemini://alexschroeder.ch/do/new",
+ "gemini://alexschroeder.ch/do/more",
+ "gemini://alexschroeder.ch/do/tags",
+ "gemini://alexschroeder.ch//do/match",
+ "gemini://alexschroeder.ch/do/search",
]
EXCLUDED_URL_PATHS = [