commit 06c0258323f11948f3d2e568d385f066c10fe38e
parent 9b21f64790e01e55a7e4a135137231abeb443fd9
Author: René Wagner <rwagner@rw-net.de>
Date: Wed, 12 May 2021 17:46:33 +0200
update 2021-05-12
Diffstat:
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -156,7 +156,6 @@ EXCLUDED_URL_PREFIXES = [
# mozz mailing list linkscraper
"gemini://mozz.us/files/gemini-links.gmi",
-
# gemini.techrights.org
"gemini://gemini.techrights.org/",
@@ -167,11 +166,13 @@ EXCLUDED_URL_PREFIXES = [
# news mirrors - not our business
"gemini://guardian.shit.cx/",
"gemini://simplynews.metalune.xyz",
+ "gemini://illegaldrugs.net/cgi-bin/news.php?",
# wikipedia proxy
"gemini://wp.pitr.ca/",
"gemini://wp.glv.one/",
-
+ "gemini://wikipedia.geminet.org/",
+
# client torture test
"gemini://egsam.pitr.ca/",
"gemini://egsam.glv.one/",
diff --git a/serve/templates/news.gmi b/serve/templates/news.gmi
@@ -2,6 +2,10 @@
## News
+### 2021-05-12
+We are back on track with crawl and index, everything is up-to-date again.
+I had to add another news and a wikipedia mirror to the exclude list. The current implementation can't handle such a huge amount of information well.
+
### 2021-05-08
Obviously this didn't work as expected. For whatever reason indexing fails repeatedly on one or another page with a mysterious sqlite error. It may to a few days till i find enough time to search for the cause of this error.
If you are familiar with peewee and sqlite or have come across this issue earlier, let me know: