geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit 2b1ff38cf9aa53855ba69a9013889eaa7191407e
parent 832865d47aebd6eb4b955657f75a496963bab269
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Wed, 17 Jun 2020 07:33:48 -0400

[crawl] Ignore some more alexschroeder pages

Diffstat:
Mgus/crawl.py | 28+++++++++++++++++++++++++++-
1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -109,12 +109,15 @@ EXCLUDED_URL_PREFIXES = [ # Alex Schroeder's problematic stuff "gemini://alexschroeder.ch/image_external", - "gemini://alexschroeder.ch/comments_on", + "gemini://alexschroeder.ch/html/", + "gemini://alexschroeder.ch/diff/", + "gemini://alexschroeder.ch/history/", "gemini://alexschroeder.ch/http", "gemini://alexschroeder.ch/https", "gemini://alexschroeder.ch/tag/", "gemini://alexschroeder.ch/raw/", "gemini://alexschroeder.ch/map/", + "gemini://alexschroeder.ch/do/comment", "gemini://alexschroeder.ch/do/rc", "gemini://alexschroeder.ch/do/rss", "gemini://alexschroeder.ch/do/new", @@ -122,6 +125,28 @@ EXCLUDED_URL_PREFIXES = [ "gemini://alexschroeder.ch/do/tags", "gemini://alexschroeder.ch/do/match", "gemini://alexschroeder.ch/do/search", + + # communitywiki's problematic stuff + "gemini://communitywiki.org:1966/image_external", + "gemini://communitywiki.org:1966/html/", + "gemini://communitywiki.org:1966/diff/", + "gemini://communitywiki.org:1966/history/", + "gemini://communitywiki.org:1966/http", + "gemini://communitywiki.org:1966/https", + "gemini://communitywiki.org:1966/tag/", + "gemini://communitywiki.org:1966/raw/", + "gemini://communitywiki.org:1966/map/", + "gemini://communitywiki.org:1966/do/comment", + "gemini://communitywiki.org:1966/do/rc", + "gemini://communitywiki.org:1966/do/rss", + "gemini://communitywiki.org:1966/do/new", + "gemini://communitywiki.org:1966/do/more", + "gemini://communitywiki.org:1966/do/tags", + "gemini://communitywiki.org:1966/do/match", + "gemini://communitywiki.org:1966/do/search", + + # youtube mirror + "gemini://pon.ix.tc/cgi-bin/youtube.cgi?", ] EXCLUDED_URL_PATHS = [ @@ -135,6 +160,7 @@ EXCLUDED_URL_PATHS = [ CRAWL_DELAYS = { "alexschroeder.ch": 5000, + "communitywiki.org": 5000, }