excludes.py (6934B)
1 # These are checked against normalized_url, so they should be 2 # prepended with the gemini:// protocol, be all lowercased, and 3 # not have the port specified if it is 1965. 4 EXCLUDED_URL_PREFIXES = [ 5 "gemini://localhost", 6 "gemini://example.org", 7 "gemini://example.com", 8 "gemini://www.youtube.com/", 9 # LEO generating useless URIs 10 "gemini://tilde.team/~khuxkm/leo/", 11 # all combinations of a tictactoe board 12 "gemini://tictactoe.lanterne.chilliet.eu", 13 14 "gemini://kennedy.gemi.dev/", 15 "gemini://gemi.dev/cgi-bin/", 16 "gemini://auragem.space/texts/jewish", 17 "gemini://auragem.space/twitch/", 18 # serving big files and slooow capsule -> takes to long to crawl 19 "gemini://kamalatta.ddnss.de/", 20 "gemini://tweek.zyxxyz.eu/valentina/", 21 22 # ASCII art with emulated modem speed 23 "gemini://ansi.hrtk.in/", 24 "gemini://matrix.kiwifarms.net", 25 26 # ZachDeCooks songs 27 "gemini://songs.zachdecook.com/song.gmi.php/", 28 "gemini://songs.zachdecook.com/chord.svg/", 29 "gemini://gemini.zachdecook.com/cgi-bin/ccel.sh", 30 31 # kwiecien gemcast 32 "gemini://kwiecien.us/gemcast/", 33 34 # breaks crawl due to recursion overflow 35 "gemini://cadence.moe/chapo/", 36 37 "gemini://nixo.xyz/reply/", 38 "gemini://nixo.xyz/notify", 39 "gemini://gemini.thebackupbox.net/queryresponse", 40 "gemini://gemini.thebackupbox.net/cgi-bin/", 41 "gemini://gem.garichankar.com/share_audio", 42 43 # Mastodon mirrors 44 "gemini://vps01.rdelaage.ovh/", 45 "gemini://mastogem.picasoft.net/", 46 "gemini://mastogem.remorse.us/", 47 48 # various failing resources on runjimmyrunrunyoufuckerrun.com 49 "gemini://runjimmyrunrunyoufuckerrun.com/fonts/", 50 "gemini://runjimmyrunrunyoufuckerrun.com/tmp/", 51 52 # Search providers 53 "gemini://houston.coder.town/search?", 54 "gemini://houston.coder.town/search/", 55 "gemini://marginalia.nu/search", 56 "gemini://geminispace.info", 57 "gemini://tlgs.one/", 58 "gemini://gus.guru/", 59 60 # Geddit 61 "gemini://geddit.pitr.ca/post?", 62 "gemini://geddit.pitr.ca/c/", 63 "gemini://geddit.glv.one/post?", 64 "gemini://geddit.glv.one/c/", 65 66 # Marmaladefoo calculator 67 "gemini://gemini.marmaladefoo.com/cgi-bin/calc.cgi?", 68 "gemini://gemini.circumlunar.space/users/fgaz/calculator/", 69 70 # Individual weather pages 71 "gemini://acidic.website/cgi-bin/weather.tcl?", 72 "gemini://caolan.uk/weather/", 73 74 # Alex Schroeder's problematic stuff 75 "gemini://alexschroeder.ch/image_external", 76 "gemini://alexschroeder.ch/html/", 77 "gemini://alexschroeder.ch/diff/", 78 "gemini://alexschroeder.ch/history/", 79 "gemini://alexschroeder.ch/http", 80 "gemini://alexschroeder.ch/https", 81 "gemini://alexschroeder.ch/tag/", 82 "gemini://alexschroeder.ch/raw/", 83 "gemini://alexschroeder.ch/map/", 84 "gemini://alexschroeder.ch/do/comment", 85 "gemini://alexschroeder.ch/do/rc", 86 "gemini://alexschroeder.ch/do/rss", 87 "gemini://alexschroeder.ch/do/new", 88 "gemini://alexschroeder.ch/do/more", 89 "gemini://alexschroeder.ch/do/tags", 90 "gemini://alexschroeder.ch/do/match", 91 "gemini://alexschroeder.ch/do/search", 92 "gemini://alexschroeder.ch/do/gallery/", 93 94 # mozz mailing list linkscraper 95 "gemini://mozz.us/files/gemini-links.gmi", 96 "gemini://gem.benscraft.info/mailing-list", 97 # gemini.techrights.org 98 "gemini://gemini.techrights.org/", 99 100 # endless stream 101 "gemini://202x.moe/resonance", 102 103 # big file 104 "gemini://mirrors.apple2.org.za/active/ftp.apple.asimov.net/", 105 106 # hackernews mirror 107 "gemini://gem.graypegg.com/hn/", 108 # antenna filters 109 "gemini://warmedal.se/~antenna/filter", 110 111 # youtube mirror 112 "gemini://auragem.space/cgi-bin/youtube.cgi?", 113 "gemini://auragem.space/youtube/", 114 115 # news mirrors - not our business 116 "gemini://teapot.styx.org", 117 "gemini://taz.de/", 118 "gemini://gemini.knusbaum.com/feeds", 119 "gemini://guardian.shit.cx/", 120 "gemini://simplynews.metalune.xyz", 121 "gemini://illegaldrugs.net/cgi-bin/news.php", 122 "gemini://illegaldrugs.net/cgi-bin/reader", 123 "gemini://illegaldrugs.net:1965/cgi-bin/reader", 124 "gemini://rawtext.club/~sloum/geminews", 125 "gemini://gemini.cabestan.tk/hn", 126 "gemini://hn.filiuspatris.net/", 127 "gemini://schmittstefan.de/de/nachrichten/", 128 "gemini://gmi.noulin.net/mobile", 129 "gemini://jpfox.fr/rss/", 130 "gemini://dw.schettler.net/", 131 "gemini://dioskouroi.xyz/top", 132 "gemini://drewdevault.com/cgi-bin/hn.py", 133 "gemini://tobykurien.com/maverick/", 134 "gemini://news.manuceau.net/", 135 "gemini://gemini-news.com/", 136 "gemini://news.tuxmachines.org/", 137 "gemini://musicdir.zachdecook.com/", 138 "gemini://federal.cx/news", 139 "gemini://kypan.me/cgi", 140 141 # wikipedia proxy 142 "gemini://wp.pitr.ca/", 143 "gemini://wp.glv.one/", 144 "gemini://wikipedia.geminet.org/", 145 "gemini://wikipedia.geminet.org:1966", 146 "gemini://vault.transjovian.org/", 147 148 # client torture test 149 "gemini://egsam.pitr.ca/", 150 "gemini://egsam.glv.one/", 151 "gemini://gemini.conman.org/test", 152 153 # mozz's chat 154 "gemini://chat.mozz.us/stream", 155 "gemini://chat.mozz.us/submit", 156 157 # gempod 158 "gemini://rocketcaster.xyz/share/", 159 160 # gopher proxy 161 "gemini://80h.dev/agena/", 162 163 # astrobotany 164 "gemini://astrobotany.mozz.us/", 165 "gemini://carboncopy.xyz/cgi-bin/apache.gex/", 166 167 # infinite maze 168 "gemini://alexey.shpakovsky.ru/maze", 169 170 # susa.net 171 "gemini://gemini.susa.net/cgi-bin/search?", 172 "gemini://gemini.susa.net/cgi-bin/twitter?", 173 "gemini://gemini.susa.net/cgi-bin/vim-search?", 174 "gemini://gemini.susa.net/cgi-bin/links_stu.lua?", 175 176 "gemini://gemini.spam.works/textfiles/", 177 "gemini://gemini.spam.works/mirrors/textfiles/", 178 "gemini://gemini.spam.works/users/dvn/archive/", 179 180 # streams that never end... 181 "gemini://gemini.thebackupbox.net/radio", 182 "gemini://higeki.jp/radio", 183 184 # full web proxy 185 "gemini://webgate.geminet.org/", 186 "gemini://drewdevault.com/cgi-bin/web.sh?", 187 "gemini://gemiprox.pollux.casa/", 188 "gemini://gemiprox.pollux.casa:1966", 189 "gemini://ecs.d2evs.net/proxy/", 190 191 # killing crawl, I think maybe because it's too big 192 # cryptocurrency bullshit 193 "gemini://gem.denarii.cloud/", 194 195 # docs - not our business 196 "gemini://cfdocs.wetterberg.nu/", 197 "gemini://godocs.io", 198 199 "gemini://musicbrainz.uploadedlobster.com/", 200 201 # git repos 202 "gemini://git.skyjake.fi", 203 "gemini://gemini.unlimited.pizza/git", 204 "gemini://r.bdr.sh/git", 205 # games 206 "gemini://jsreed5.org/live/", 207 "gemini://gemini.thegonz.net/ski", 208 "gemini://gemini.thegonz.net/gemski", 209 "gemini://thegonz.net/", 210 "gemini://gemlog.stargrave.org/", 211 # NOULIN 212 "gemini://gmi.noulin.net/stackoverflow/", 213 "gemini://gmi.noulin.net/gitRepositories/", 214 "gemini://gmi.noulin.net/man/", 215 ] 216 217 EXCLUDED_URL_PATHS = [ 218 "favicon.ico", 219 "favicon.txt", 220 "robots.txt", 221 "rss.txt", 222 "rss.xml", 223 ]