geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit dfe2e581d219492491e0cbcefe2de9494e307770
parent 332a9da344f23619cd40ac02e897311f31689f51
Author: René Wagner <rwa@clttr.info>
Date:   Sun, 20 Nov 2022 12:10:30 +0100

new exclude, make crawl usw 3 threads

Diffstat:
Mgus/crawl.py | 4++--
Mgus/excludes.py | 3+++
Minfra/rebuild_index.sh | 3++-
3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -550,13 +550,13 @@ def run_crawl(should_run_destructive=False, seed_urls=[]): expired_resources = [GeminiResource(url) for url in load_expired_urls()] random.shuffle(expired_resources) - with ThreadPoolExecutor(max_workers=2) as executor: + with ThreadPoolExecutor(max_workers=3) as executor: executor.map(crawl_resource, expired_resources) executor.shutdown(wait=True, cancel_futures=False) submitted_resources = [GeminiResource(url) for url in load_seed_request_urls()] random.shuffle(submitted_resources) - with ThreadPoolExecutor(max_workers=2) as executor: + with ThreadPoolExecutor(max_workers=3) as executor: executor.map(crawl_resource, submitted_resources) executor.shutdown(wait=True, cancel_futures=False) diff --git a/gus/excludes.py b/gus/excludes.py @@ -178,6 +178,9 @@ EXCLUDED_URL_PREFIXES = [ # skyjakes git repos "gemini://git.skyjake.fi", + + # chess games or stuff + "gemini://jsreed5.org/live/", ] EXCLUDED_URL_PATHS = [ diff --git a/infra/rebuild_index.sh b/infra/rebuild_index.sh @@ -2,11 +2,12 @@ if [ `date +%d` != "01" ] then /home/gus/.poetry/bin/poetry run build_index else - sudo systemctl stop gusi + sudo systemctl stop gus cp -r /home/gus/index /home/gus/index.new sudo systemctl start gus /home/gus/.poetry/bin/poetry run build_index -d rm -rf /home/gus/index.old mv /home/gus/index /home/gus/index.old mv /home/gus/index.new /home/gus/index + rm -rf /home/gus/index/MAIN.tmp/ fi