geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit 161252e750073377625a22d1e4d8cd0748116e4f
parent 8994b21fea2d7adf1ababecfae27971ff8390fb1
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Wed, 20 May 2020 08:15:27 -0400

[serve] Update the loading of statistics

Do it more dynamically, so after users submit seed requests, they will
show up immediately on the /known-hosts page.

Diffstat:
Mgus/serve.py | 27+++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/gus/serve.py b/gus/serve.py @@ -15,12 +15,27 @@ from jetforce import Response, Status from whoosh.index import open_dir from whoosh.qparser import MultifieldParser -from gus.lib.index_statistics import load_last_statistics_from_file +from gus.lib.index_statistics import compute_index_statistics, load_last_statistics_from_file from gus.crawl import run_crawl -last_statistics = load_last_statistics_from_file("statistics.csv") +INDEX_DIR = "index" app = jetforce.JetforceApplication() +def load_and_compute_statistics(filename): + statistics = load_last_statistics_from_file(filename) + + # we want fresh data for the below figures, and they aren't persisted to file + # during non-destructive crawls, so recompute them! + index_statistics = compute_index_statistics(INDEX_DIR) + statistics["index_modification_time"] = index_statistics["index_modification_time"] + statistics["page_count"] = index_statistics["page_count"] + statistics["domain_count"] = index_statistics["domain_count"] + statistics["content_type_frequencies"] = index_statistics["content_type_frequencies"] + statistics["domains"] = index_statistics["domains"] + return statistics + + +last_statistics = load_and_compute_statistics("statistics.csv") def _render_header(): return [ @@ -36,7 +51,7 @@ def _render_footer(): "", "=> /add-seed See any missing results? Let GUS know your gemini URL exists.", "", - "Index generation date: {:%Y-%m-%d}".format(index_modification_time) + "Index updated on: {:%Y-%m-%d}".format(index_modification_time) ] @@ -47,7 +62,7 @@ def _render_index_statistics(): "", "## Overall", "", - "These figures are representative of the aggregate size of Geminispace at the time the current index was generated on {}.".format(last_statistics["index_modification_time"]), + "These figures are representative of the aggregate size of Geminispace at the time the current index was last updated on {}.".format(last_statistics["index_modification_time"]), "", "Page Count : {:>5}".format(last_statistics["page_count"]), "Domain Count : {:>5}".format(last_statistics["domain_count"]), @@ -56,7 +71,7 @@ def _render_index_statistics(): "", "## By Content Type", "", - "These figures are representative of the number of pages seen per content type at the time the current index was generated on {}.".format(last_statistics["index_modification_time"]), + "These figures are representative of the number of pages seen per content type at the time the current index was last updated on {}.".format(last_statistics["index_modification_time"]), "", ] for pair in last_statistics["content_type_frequencies"]: @@ -369,7 +384,7 @@ def main(): app=app, ) global ix - ix = open_dir("index") + ix = open_dir(INDEX_DIR) global index_modification_time index_modification_time = _get_index_modification_time() global searcher