commit f8aa717e97594405e80157674ac355e15da4b8af
parent bfcfec84e04a54c1c72df6858512ad6a238ec2d0
Author: Natalie Pendragon <natpen@natpen.net>
Date: Wed, 3 Jun 2020 12:50:10 -0400
[crawl] Fix default crawl delay when not specified explicitly
Diffstat:
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -241,9 +241,9 @@ def crawl(gemini_resource):
can_fetch = robots_file.can_fetch("gus", gr.normalized_url)
# same approach as above - last value wins
- crawl_delay = robots_file.crawl_delay("*") or 0
- crawl_delay = robots_file.crawl_delay("indexer") or 0
- crawl_delay = robots_file.crawl_delay("gus") or 0
+ crawl_delay = robots_file.crawl_delay("*")
+ crawl_delay = robots_file.crawl_delay("indexer")
+ crawl_delay = robots_file.crawl_delay("gus")
if not can_fetch:
print("ROBOTS SKIP : %s" % gr.fetchable_url)
@@ -258,7 +258,7 @@ def crawl(gemini_resource):
# Crawl delay
if gr.normalized_host in domain_hit_timings:
- if crawl_delay is None:
+ if not crawl_delay:
next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=500)
else:
next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=crawl_delay)