commit e24313a14c4ad496f418f47c1cd6162373234ef3
parent c2391751e9d842431c403c8b6221827dde4e6a99
Author: René Wagner <rwa@clttr.info>
Date: Sun, 14 Aug 2022 17:35:35 +0200
fix test and add additional test for special robots.txt
Diffstat:
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/gus/excludes.py b/gus/excludes.py
@@ -10,7 +10,7 @@ EXCLUDED_URL_PREFIXES = [
# all combinations of a tictactoe board
"gemini://tictactoe.lanterne.chilliet.eu",
- "gemini://gemi.dev/cgi-bin/waffle.cgi",
+ "gemini://gemi.dev/cgi-bin/",
"gemini://auragem.space/texts/jewish",
"gemini://auragem.space/twitch/",
# serving big files and slooow capsule -> takes to long to crawl
diff --git a/tests/gus/lib/test_gemini.py b/tests/gus/lib/test_gemini.py
@@ -128,3 +128,18 @@ Disallow: /
User-agent: testbot
Allow: /""")
self._assert_fetchable(rp)
+
+ def test_disallow_gemidev_waffle(self):
+ rp = self._get_parser("""user-agent: *
+Disallow: /cgi-bin/wp.cgi/view
+Disallow: /cgi-bin/wp.cgi/media
+Disallow: /cgi-bin/wp.cgi/search
+
+Disallow: /cgi-bin/waffle.cgi/article
+Disallow: /cgi-bin/waffle.cgi/feed
+Disallow: /cgi-bin/waffle.cgi/links
+Disallow: /cgi-bin/waffle.cgi/view
+
+Disallow: /cgi-bin/witw.cgi/play
+""")
+ self._assert_fetchable(rp, "/cgi-bin/waffle.cgi/feed/link", False)
diff --git a/tests/gus/test_crawl.py b/tests/gus/test_crawl.py
@@ -31,8 +31,6 @@ class TestUrlExclusion:
@pytest.mark.parametrize("test_url,expected_result", [
("gemini://localhost", True),
("gemini://example.org", True),
- ("gus.guru", False),
- ("gus.guru/search?turkey", True),
])
def test_excluded_url_prefixes(self, test_url, expected_result):
resource = GeminiResource(test_url)