commit dfe2e581d219492491e0cbcefe2de9494e307770
parent 332a9da344f23619cd40ac02e897311f31689f51
Author: René Wagner <rwa@clttr.info>
Date: Sun, 20 Nov 2022 12:10:30 +0100
new exclude, make crawl usw 3 threads
Diffstat:
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -550,13 +550,13 @@ def run_crawl(should_run_destructive=False, seed_urls=[]):
expired_resources = [GeminiResource(url) for url in load_expired_urls()]
random.shuffle(expired_resources)
- with ThreadPoolExecutor(max_workers=2) as executor:
+ with ThreadPoolExecutor(max_workers=3) as executor:
executor.map(crawl_resource, expired_resources)
executor.shutdown(wait=True, cancel_futures=False)
submitted_resources = [GeminiResource(url) for url in load_seed_request_urls()]
random.shuffle(submitted_resources)
- with ThreadPoolExecutor(max_workers=2) as executor:
+ with ThreadPoolExecutor(max_workers=3) as executor:
executor.map(crawl_resource, submitted_resources)
executor.shutdown(wait=True, cancel_futures=False)
diff --git a/gus/excludes.py b/gus/excludes.py
@@ -178,6 +178,9 @@ EXCLUDED_URL_PREFIXES = [
# skyjakes git repos
"gemini://git.skyjake.fi",
+
+ # chess games or stuff
+ "gemini://jsreed5.org/live/",
]
EXCLUDED_URL_PATHS = [
diff --git a/infra/rebuild_index.sh b/infra/rebuild_index.sh
@@ -2,11 +2,12 @@ if [ `date +%d` != "01" ]
then
/home/gus/.poetry/bin/poetry run build_index
else
- sudo systemctl stop gusi
+ sudo systemctl stop gus
cp -r /home/gus/index /home/gus/index.new
sudo systemctl start gus
/home/gus/.poetry/bin/poetry run build_index -d
rm -rf /home/gus/index.old
mv /home/gus/index /home/gus/index.old
mv /home/gus/index.new /home/gus/index
+ rm -rf /home/gus/index/MAIN.tmp/
fi