commit cbcbcc59a100083675a04ff2114446fcfdc7fe8e
parent 9dc0120b92c51f8945fa642d1405f9a0bc6f4c39
Author: Natalie Pendragon <natpen@natpen.net>
Date: Mon, 2 Mar 2020 06:43:56 -0500
[crawl] Respect "indexer" robots.txt entries
Diffstat:
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -110,7 +110,7 @@ def crawl_url(url):
return
robots_file = get_robots_file(normalized_url)
if robots_file is not None:
- can_fetch = robots_file.can_fetch("gus", normalized_url) and robots_file.can_fetch("*", normalized_url)
+ can_fetch = robots_file.can_fetch("gus", normalized_url) and robots_file.can_fetch("*", normalized_url) and robots_file.can_fetch("indexer", normalized_url)
if not can_fetch:
print("ROBOTS SKIP : %s" % url)
print("--------------------------")