test_crawl.py (1492B)
1 import pytest 2 3 from gus.crawl import should_skip 4 from gus.lib.gemini import GeminiResource 5 6 class TestUrlExclusion: 7 @pytest.mark.parametrize("test_url,expected_result", [ 8 ("gemini://gemini.circumlunar.space/favicon.ico", True), 9 ("gemini://gemini.circumlunar.space/rss.txt", True), 10 ]) 11 def test_excluded_url_paths(self, test_url, expected_result): 12 resource = GeminiResource(test_url) 13 assert should_skip(resource) == expected_result 14 15 16 @pytest.mark.parametrize("test_url,expected_result", [ 17 ("gemini://hannuhartikainen.fi/twinwiki/_revert/1594367314474", True), 18 ("gemini://hannuhartikainen.fi/twinwiki/1594367314474", False), 19 ("gemini://hannuhartikainen.fi/twinwiki/Sandbox/_history/1594037613712", True), 20 ("gemini://hannuhartikainen.fi/twinwiki", False), 21 ("gemini://123456.ch", True), 22 ("gemini://123456.ch/fnord", True), 23 ("gemini://almp1234.app", True), 24 ("gemini://almp1234.app/fnord", True), 25 ]) 26 def test_excluded_url_pattern(self, test_url, expected_result): 27 resource = GeminiResource(test_url) 28 assert should_skip(resource) == expected_result 29 30 31 @pytest.mark.parametrize("test_url,expected_result", [ 32 ("gemini://localhost", True), 33 ("gemini://example.org", True), 34 ]) 35 def test_excluded_url_prefixes(self, test_url, expected_result): 36 resource = GeminiResource(test_url) 37 assert should_skip(resource) == expected_result