geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

test_crawl.py (1492B)


      1 import pytest
      2 
      3 from gus.crawl import should_skip
      4 from gus.lib.gemini import GeminiResource
      5 
      6 class TestUrlExclusion:
      7     @pytest.mark.parametrize("test_url,expected_result", [
      8         ("gemini://gemini.circumlunar.space/favicon.ico", True),
      9         ("gemini://gemini.circumlunar.space/rss.txt", True),
     10     ])
     11     def test_excluded_url_paths(self, test_url, expected_result):
     12         resource = GeminiResource(test_url)
     13         assert should_skip(resource) == expected_result
     14 
     15 
     16     @pytest.mark.parametrize("test_url,expected_result", [
     17         ("gemini://hannuhartikainen.fi/twinwiki/_revert/1594367314474", True),
     18         ("gemini://hannuhartikainen.fi/twinwiki/1594367314474", False),
     19         ("gemini://hannuhartikainen.fi/twinwiki/Sandbox/_history/1594037613712", True),
     20         ("gemini://hannuhartikainen.fi/twinwiki", False),
     21         ("gemini://123456.ch", True),
     22         ("gemini://123456.ch/fnord", True),
     23         ("gemini://almp1234.app", True),
     24         ("gemini://almp1234.app/fnord", True),
     25     ])
     26     def test_excluded_url_pattern(self, test_url, expected_result):
     27         resource = GeminiResource(test_url)
     28         assert should_skip(resource) == expected_result
     29 
     30 
     31     @pytest.mark.parametrize("test_url,expected_result", [
     32         ("gemini://localhost", True),
     33         ("gemini://example.org", True),
     34     ])
     35     def test_excluded_url_prefixes(self, test_url, expected_result):
     36         resource = GeminiResource(test_url)
     37         assert should_skip(resource) == expected_result