commit 2b1ff38cf9aa53855ba69a9013889eaa7191407e
parent 832865d47aebd6eb4b955657f75a496963bab269
Author: Natalie Pendragon <natpen@natpen.net>
Date: Wed, 17 Jun 2020 07:33:48 -0400
[crawl] Ignore some more alexschroeder pages
Diffstat:
1 file changed, 27 insertions(+), 1 deletion(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -109,12 +109,15 @@ EXCLUDED_URL_PREFIXES = [
# Alex Schroeder's problematic stuff
"gemini://alexschroeder.ch/image_external",
- "gemini://alexschroeder.ch/comments_on",
+ "gemini://alexschroeder.ch/html/",
+ "gemini://alexschroeder.ch/diff/",
+ "gemini://alexschroeder.ch/history/",
"gemini://alexschroeder.ch/http",
"gemini://alexschroeder.ch/https",
"gemini://alexschroeder.ch/tag/",
"gemini://alexschroeder.ch/raw/",
"gemini://alexschroeder.ch/map/",
+ "gemini://alexschroeder.ch/do/comment",
"gemini://alexschroeder.ch/do/rc",
"gemini://alexschroeder.ch/do/rss",
"gemini://alexschroeder.ch/do/new",
@@ -122,6 +125,28 @@ EXCLUDED_URL_PREFIXES = [
"gemini://alexschroeder.ch/do/tags",
"gemini://alexschroeder.ch/do/match",
"gemini://alexschroeder.ch/do/search",
+
+ # communitywiki's problematic stuff
+ "gemini://communitywiki.org:1966/image_external",
+ "gemini://communitywiki.org:1966/html/",
+ "gemini://communitywiki.org:1966/diff/",
+ "gemini://communitywiki.org:1966/history/",
+ "gemini://communitywiki.org:1966/http",
+ "gemini://communitywiki.org:1966/https",
+ "gemini://communitywiki.org:1966/tag/",
+ "gemini://communitywiki.org:1966/raw/",
+ "gemini://communitywiki.org:1966/map/",
+ "gemini://communitywiki.org:1966/do/comment",
+ "gemini://communitywiki.org:1966/do/rc",
+ "gemini://communitywiki.org:1966/do/rss",
+ "gemini://communitywiki.org:1966/do/new",
+ "gemini://communitywiki.org:1966/do/more",
+ "gemini://communitywiki.org:1966/do/tags",
+ "gemini://communitywiki.org:1966/do/match",
+ "gemini://communitywiki.org:1966/do/search",
+
+ # youtube mirror
+ "gemini://pon.ix.tc/cgi-bin/youtube.cgi?",
]
EXCLUDED_URL_PATHS = [
@@ -135,6 +160,7 @@ EXCLUDED_URL_PATHS = [
CRAWL_DELAYS = {
"alexschroeder.ch": 5000,
+ "communitywiki.org": 5000,
}