geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit ec356baa82d67ea36aa3a32ce99c341e89fc9960
parent f8aa717e97594405e80157674ac355e15da4b8af
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Wed,  3 Jun 2020 12:50:39 -0400

[crawl] Ignore some troublesome content from alexschroeder.ch

Diffstat:
Mgus/crawl.py | 10++++++++++
1 file changed, 10 insertions(+), 0 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -100,6 +100,16 @@ EXCLUDED_URL_PREFIXES = [ # Geddit "gemini://geddit.pitr.ca/post?", "gemini://geddit.pitr.ca/c/", + + # alexschroeder.ch b/c its robots.txt isn't working... + "gemini://alexschroeder.ch/map/", + "gemini://alexschroeder.ch/do/rc", + "gemini://alexschroeder.ch/do/rss", + "gemini://alexschroeder.ch/do/new", + "gemini://alexschroeder.ch/do/more", + "gemini://alexschroeder.ch/do/tags", + "gemini://alexschroeder.ch//do/match", + "gemini://alexschroeder.ch/do/search", ] EXCLUDED_URL_PATHS = [