commit 569baa0637e3d21d7423f28ef38f325ad2b888bc
parent 42af8b76b338baa2fc1f66e1ec5f2d3f91771849
Author: René Wagner <rwagner@rw-net.de>
Date: Wed, 10 Feb 2021 11:06:47 +0100
limit max_crawl_depth to 100 for normal crawl
There are capsules out there that kill the crawler due
to a recursion exceeding the limits of python.
Python limit seems to be around 1000, so the value
can be increased if needed, but i don't think we
miss anything with the current value.
Signed-off-by: Natalie Pendragon <natpen@natpen.net>
Diffstat:
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -784,7 +784,7 @@ def run_crawl(should_run_destructive=False, seed_urls=[]):
global domain_hit_timings
domain_hit_timings = {}
global max_crawl_depth
- max_crawl_depth = -1
+ max_crawl_depth = 100
expired_resources = [GeminiResource(url) for url in load_expired_urls()]
for resource in expired_resources: