geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit f8aa717e97594405e80157674ac355e15da4b8af
parent bfcfec84e04a54c1c72df6858512ad6a238ec2d0
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Wed,  3 Jun 2020 12:50:10 -0400

[crawl] Fix default crawl delay when not specified explicitly

Diffstat:
Mgus/crawl.py | 8++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -241,9 +241,9 @@ def crawl(gemini_resource): can_fetch = robots_file.can_fetch("gus", gr.normalized_url) # same approach as above - last value wins - crawl_delay = robots_file.crawl_delay("*") or 0 - crawl_delay = robots_file.crawl_delay("indexer") or 0 - crawl_delay = robots_file.crawl_delay("gus") or 0 + crawl_delay = robots_file.crawl_delay("*") + crawl_delay = robots_file.crawl_delay("indexer") + crawl_delay = robots_file.crawl_delay("gus") if not can_fetch: print("ROBOTS SKIP : %s" % gr.fetchable_url) @@ -258,7 +258,7 @@ def crawl(gemini_resource): # Crawl delay if gr.normalized_host in domain_hit_timings: - if crawl_delay is None: + if not crawl_delay: next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=500) else: next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=crawl_delay)