commit 823e8d6a2c764cd58aa77b69b3bb2bc9da5720ca
parent 015279c14108635a1f7639798d19589537ed5a42
Author: René Wagner <rwa@clttr.info>
Date: Sat, 5 Feb 2022 10:37:07 +0100
precompute hosts statistics
relates to #42
Diffstat:
2 files changed, 26 insertions(+), 30 deletions(-)
diff --git a/serve/models.py b/serve/models.py
@@ -20,8 +20,31 @@ class GUS:
self.db = init_db(f"{constants.INDEX_DIR}/{constants.DB_FILENAME}")
self.statistics = compute_index_statistics(self.db)
self.statistics_historical_overall = load_all_statistics_from_file(
- constants.STATISTICS_FILE
+ constants.STATISTICS_FILE)
+ hosts_query = Page.raw(
+ """
+ SELECT DISTINCT p.domain
+ FROM page AS p
+ WHERE last_crawl_success_at IS NOT NULL
+ AND last_status = 20
+ ORDER BY p.domain
+ """
+ )
+ self.hosts = hosts_query.execute()
+ newest_hosts_query = Page.raw(
+ """
+ SELECT p.domain, p.first_seen_at
+ FROM page AS p
+ WHERE last_crawl_success_at IS NOT NULL
+ AND first_seen_at IS NOT NULL
+ AND last_status = 20
+ GROUP BY p.domain
+ ORDER BY first_seen_at DESC
+ LIMIT 30
+ """
)
+
+ self.newest_hosts = newest_hosts_query.execute()
def search_index(self, query, requested_page):
query = self.index.parse_query(query)
@@ -98,33 +121,6 @@ AND p.last_crawl_success_at IS NOT NULL"""
)
return feeds_query.execute()
- def get_hosts(self):
- hosts_query = Page.raw(
- """
- SELECT DISTINCT p.domain
- FROM page AS p
- WHERE last_crawl_success_at IS NOT NULL
- AND last_status = 20
- ORDER BY p.domain
- """
- )
- return hosts_query.execute()
-
- def get_newest_hosts(self):
- newest_hosts_query = Page.raw(
- """
- SELECT p.domain, p.first_seen_at
- FROM page AS p
- WHERE last_crawl_success_at IS NOT NULL
- AND first_seen_at IS NOT NULL
- AND last_status = 20
- GROUP BY p.domain
- ORDER BY first_seen_at DESC
- LIMIT 30
- """
- )
- return newest_hosts_query.execute()
-
def get_newest_pages(self):
newest_pages_query = Page.raw(
"""SELECT p.url, p.fetchable_url, p.first_seen_at FROM page as p
diff --git a/serve/views.py b/serve/views.py
@@ -109,7 +109,7 @@ def statistics(request):
def known_hosts(request):
body = render_template(
"known_hosts.gmi",
- known_hosts=gus.get_hosts(),
+ known_hosts=gus.hosts,
index_modification_time=gus.statistics["index_modification_time"]
)
return Response(Status.SUCCESS, "text/gemini", body)
@@ -119,7 +119,7 @@ def known_hosts(request):
def newest_hosts(request):
body = render_template(
"newest_hosts.gmi",
- newest_hosts=gus.get_newest_hosts(),
+ newest_hosts=gus.newest_hosts,
index_modification_time=gus.statistics["index_modification_time"]
)
return Response(Status.SUCCESS, "text/gemini", body)