commit 0b4062fe6319107c42778b324aced2bee6b94444
parent 823e8d6a2c764cd58aa77b69b3bb2bc9da5720ca
Author: René Wagner <rwa@clttr.info>
Date: Sat, 5 Feb 2022 16:17:35 +0100
precompute feeds and pages
related to #42
Diffstat:
2 files changed, 27 insertions(+), 33 deletions(-)
diff --git a/serve/models.py b/serve/models.py
@@ -43,9 +43,26 @@ class GUS:
LIMIT 30
"""
)
-
self.newest_hosts = newest_hosts_query.execute()
+ newest_pages_query = Page.raw(
+ """SELECT p.url, p.fetchable_url, p.first_seen_at FROM page as p
+ WHERE last_crawl_success_at IS NOT NULL AND first_seen_at IS NOT NULL
+ ORDER BY first_seen_at DESC
+ LIMIT 50""")
+ self.newest_pages = newest_pages_query.execute()
+ feeds_query = Page.raw(
+ """SELECT DISTINCT p.*
+ FROM page AS p
+ WHERE (p.url LIKE '%atom.xml'
+ OR p.url LIKE '%feed.xml'
+ OR p.url LIKE '%.rss'
+ OR p.url LIKE '%.atom'
+ OR p.content_type IN ('application/atom+xml', 'application/rss+xml'))
+ AND p.last_crawl_success_at IS NOT NULL""")
+ self.feeds = feeds_query.execute()
+
+
def search_index(self, query, requested_page):
query = self.index.parse_query(query)
results = self.index.search(query, requested_page, pagelen=10)
@@ -77,13 +94,13 @@ class GUS:
u = resource.normalized_url.rstrip("/")
backlinks_query = Page.raw(
"""SELECT p_from.url, l.is_cross_host_like
-FROM page AS p_from
-JOIN link as l ON l.from_page_id == p_from.id
-JOIN page as p_to ON p_to.id == l.to_page_id
-WHERE p_to.url IN (?, ?)
-AND p_from.url != ?
-GROUP BY p_from.url
-ORDER BY l.is_cross_host_like, p_from.url ASC""",
+ FROM page AS p_from
+ JOIN link as l ON l.from_page_id == p_from.id
+ JOIN page as p_to ON p_to.id == l.to_page_id
+ WHERE p_to.url IN (?, ?)
+ AND p_from.url != ?
+ GROUP BY p_from.url
+ ORDER BY l.is_cross_host_like, p_from.url ASC""",
u,
f"{u}/",
resource.normalized_url,
@@ -108,29 +125,6 @@ ORDER BY l.is_cross_host_like, p_from.url ASC""",
)
return link_text
- def get_feeds(self):
- feeds_query = Page.raw(
- """SELECT DISTINCT p.*
-FROM page AS p
-WHERE (p.url LIKE '%atom.xml'
-OR p.url LIKE '%feed.xml'
-OR p.url LIKE '%.rss'
-OR p.url LIKE '%.atom'
-OR p.content_type IN ('application/atom+xml', 'application/rss+xml'))
-AND p.last_crawl_success_at IS NOT NULL"""
- )
- return feeds_query.execute()
-
- def get_newest_pages(self):
- newest_pages_query = Page.raw(
- """SELECT p.url, p.fetchable_url, p.first_seen_at FROM page as p
- WHERE last_crawl_success_at IS NOT NULL AND first_seen_at IS NOT NULL
-ORDER BY first_seen_at DESC
-LIMIT 50
-"""
- )
- return newest_pages_query.execute()
-
def get_search_suggestions(self, query):
return self.index.suggestions(query)
diff --git a/serve/views.py b/serve/views.py
@@ -129,7 +129,7 @@ def newest_hosts(request):
def newest_pages(request):
body = render_template(
"newest_pages.gmi",
- newest_pages=gus.get_newest_pages(),
+ newest_pages=gus.newest_pages,
index_modification_time=gus.statistics["index_modification_time"]
)
return Response(Status.SUCCESS, "text/gemini", body)
@@ -139,7 +139,7 @@ def newest_pages(request):
def known_feeds(request):
body = render_template(
"known_feeds.gmi",
- known_feeds=gus.get_feeds(),
+ known_feeds=gus.feeds,
index_modification_time=gus.statistics["index_modification_time"]
)