geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit cbcbcc59a100083675a04ff2114446fcfdc7fe8e
parent 9dc0120b92c51f8945fa642d1405f9a0bc6f4c39
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Mon,  2 Mar 2020 06:43:56 -0500

[crawl] Respect "indexer" robots.txt entries

Diffstat:
Mgus/crawl.py | 2+-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -110,7 +110,7 @@ def crawl_url(url): return robots_file = get_robots_file(normalized_url) if robots_file is not None: - can_fetch = robots_file.can_fetch("gus", normalized_url) and robots_file.can_fetch("*", normalized_url) + can_fetch = robots_file.can_fetch("gus", normalized_url) and robots_file.can_fetch("*", normalized_url) and robots_file.can_fetch("indexer", normalized_url) if not can_fetch: print("ROBOTS SKIP : %s" % url) print("--------------------------")