commit 0558ba7bc6139caa1fa2b94f9a93dc5c00dff347
parent 69da68b6bac395aef9c34de387b23ab987a1f651
Author: Natalie Pendragon <natpen@natpen.net>
Date: Sat, 5 Dec 2020 09:04:23 -0500
[crawl] Abort robots.txt parsing attempt if not text/plain
Python's built-in robots.txt parsing functionality breaks if the
content type of the robots.txt is not correctly set to text/plain. If
this is the case, simply abort the parsing attempt and allow all.
Diffstat:
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gus/lib/gemini.py b/gus/lib/gemini.py
@@ -75,7 +75,7 @@ class GeminiRobotFileParser(RobotFileParser):
if response is None:
self.allow_all = True
return
- if not response.status.startswith("2"):
+ if not response.status.startswith("2") or not response.content_type == "text/plain":
self.allow_all = True
else:
self.parse(response.content.splitlines())