commit 161252e750073377625a22d1e4d8cd0748116e4f
parent 8994b21fea2d7adf1ababecfae27971ff8390fb1
Author: Natalie Pendragon <natpen@natpen.net>
Date: Wed, 20 May 2020 08:15:27 -0400
[serve] Update the loading of statistics
Do it more dynamically, so after users submit seed requests, they will
show up immediately on the /known-hosts page.
Diffstat:
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/gus/serve.py b/gus/serve.py
@@ -15,12 +15,27 @@ from jetforce import Response, Status
from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser
-from gus.lib.index_statistics import load_last_statistics_from_file
+from gus.lib.index_statistics import compute_index_statistics, load_last_statistics_from_file
from gus.crawl import run_crawl
-last_statistics = load_last_statistics_from_file("statistics.csv")
+INDEX_DIR = "index"
app = jetforce.JetforceApplication()
+def load_and_compute_statistics(filename):
+ statistics = load_last_statistics_from_file(filename)
+
+ # we want fresh data for the below figures, and they aren't persisted to file
+ # during non-destructive crawls, so recompute them!
+ index_statistics = compute_index_statistics(INDEX_DIR)
+ statistics["index_modification_time"] = index_statistics["index_modification_time"]
+ statistics["page_count"] = index_statistics["page_count"]
+ statistics["domain_count"] = index_statistics["domain_count"]
+ statistics["content_type_frequencies"] = index_statistics["content_type_frequencies"]
+ statistics["domains"] = index_statistics["domains"]
+ return statistics
+
+
+last_statistics = load_and_compute_statistics("statistics.csv")
def _render_header():
return [
@@ -36,7 +51,7 @@ def _render_footer():
"",
"=> /add-seed See any missing results? Let GUS know your gemini URL exists.",
"",
- "Index generation date: {:%Y-%m-%d}".format(index_modification_time)
+ "Index updated on: {:%Y-%m-%d}".format(index_modification_time)
]
@@ -47,7 +62,7 @@ def _render_index_statistics():
"",
"## Overall",
"",
- "These figures are representative of the aggregate size of Geminispace at the time the current index was generated on {}.".format(last_statistics["index_modification_time"]),
+ "These figures are representative of the aggregate size of Geminispace at the time the current index was last updated on {}.".format(last_statistics["index_modification_time"]),
"",
"Page Count : {:>5}".format(last_statistics["page_count"]),
"Domain Count : {:>5}".format(last_statistics["domain_count"]),
@@ -56,7 +71,7 @@ def _render_index_statistics():
"",
"## By Content Type",
"",
- "These figures are representative of the number of pages seen per content type at the time the current index was generated on {}.".format(last_statistics["index_modification_time"]),
+ "These figures are representative of the number of pages seen per content type at the time the current index was last updated on {}.".format(last_statistics["index_modification_time"]),
"",
]
for pair in last_statistics["content_type_frequencies"]:
@@ -369,7 +384,7 @@ def main():
app=app,
)
global ix
- ix = open_dir("index")
+ ix = open_dir(INDEX_DIR)
global index_modification_time
index_modification_time = _get_index_modification_time()
global searcher