[crawl] Respect "indexer" robots.txt entries - geminispace.info

commit cbcbcc59a100083675a04ff2114446fcfdc7fe8e
parent 9dc0120b92c51f8945fa642d1405f9a0bc6f4c39
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Mon,  2 Mar 2020 06:43:56 -0500

[crawl] Respect "indexer" robots.txt entries

Diffstat:
M gus/crawl.py  | 2 +-

1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -110,7 +110,7 @@ def crawl_url(url):
         return
     robots_file = get_robots_file(normalized_url)
     if robots_file is not None:
-        can_fetch = robots_file.can_fetch("gus", normalized_url) and robots_file.can_fetch("*", normalized_url)
+        can_fetch = robots_file.can_fetch("gus", normalized_url) and robots_file.can_fetch("*", normalized_url) and robots_file.can_fetch("indexer", normalized_url)
         if not can_fetch:
             print("ROBOTS SKIP  : %s" % url)
             print("--------------------------")

	geminispace.info gemini search engine
	git clone https://git.clttr.info/geminispace.info.git
	Log (Feed) \| Files \| Refs (Tags) \| README \| LICENSE