geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit 0b4062fe6319107c42778b324aced2bee6b94444
parent 823e8d6a2c764cd58aa77b69b3bb2bc9da5720ca
Author: René Wagner <rwa@clttr.info>
Date:   Sat,  5 Feb 2022 16:17:35 +0100

precompute feeds and pages

related to #42

Diffstat:
Mserve/models.py | 56+++++++++++++++++++++++++-------------------------------
Mserve/views.py | 4++--
2 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/serve/models.py b/serve/models.py @@ -43,9 +43,26 @@ class GUS: LIMIT 30 """ ) - self.newest_hosts = newest_hosts_query.execute() + newest_pages_query = Page.raw( + """SELECT p.url, p.fetchable_url, p.first_seen_at FROM page as p + WHERE last_crawl_success_at IS NOT NULL AND first_seen_at IS NOT NULL + ORDER BY first_seen_at DESC + LIMIT 50""") + self.newest_pages = newest_pages_query.execute() + feeds_query = Page.raw( + """SELECT DISTINCT p.* + FROM page AS p + WHERE (p.url LIKE '%atom.xml' + OR p.url LIKE '%feed.xml' + OR p.url LIKE '%.rss' + OR p.url LIKE '%.atom' + OR p.content_type IN ('application/atom+xml', 'application/rss+xml')) + AND p.last_crawl_success_at IS NOT NULL""") + self.feeds = feeds_query.execute() + + def search_index(self, query, requested_page): query = self.index.parse_query(query) results = self.index.search(query, requested_page, pagelen=10) @@ -77,13 +94,13 @@ class GUS: u = resource.normalized_url.rstrip("/") backlinks_query = Page.raw( """SELECT p_from.url, l.is_cross_host_like -FROM page AS p_from -JOIN link as l ON l.from_page_id == p_from.id -JOIN page as p_to ON p_to.id == l.to_page_id -WHERE p_to.url IN (?, ?) -AND p_from.url != ? -GROUP BY p_from.url -ORDER BY l.is_cross_host_like, p_from.url ASC""", + FROM page AS p_from + JOIN link as l ON l.from_page_id == p_from.id + JOIN page as p_to ON p_to.id == l.to_page_id + WHERE p_to.url IN (?, ?) + AND p_from.url != ? + GROUP BY p_from.url + ORDER BY l.is_cross_host_like, p_from.url ASC""", u, f"{u}/", resource.normalized_url, @@ -108,29 +125,6 @@ ORDER BY l.is_cross_host_like, p_from.url ASC""", ) return link_text - def get_feeds(self): - feeds_query = Page.raw( - """SELECT DISTINCT p.* -FROM page AS p -WHERE (p.url LIKE '%atom.xml' -OR p.url LIKE '%feed.xml' -OR p.url LIKE '%.rss' -OR p.url LIKE '%.atom' -OR p.content_type IN ('application/atom+xml', 'application/rss+xml')) -AND p.last_crawl_success_at IS NOT NULL""" - ) - return feeds_query.execute() - - def get_newest_pages(self): - newest_pages_query = Page.raw( - """SELECT p.url, p.fetchable_url, p.first_seen_at FROM page as p - WHERE last_crawl_success_at IS NOT NULL AND first_seen_at IS NOT NULL -ORDER BY first_seen_at DESC -LIMIT 50 -""" - ) - return newest_pages_query.execute() - def get_search_suggestions(self, query): return self.index.suggestions(query) diff --git a/serve/views.py b/serve/views.py @@ -129,7 +129,7 @@ def newest_hosts(request): def newest_pages(request): body = render_template( "newest_pages.gmi", - newest_pages=gus.get_newest_pages(), + newest_pages=gus.newest_pages, index_modification_time=gus.statistics["index_modification_time"] ) return Response(Status.SUCCESS, "text/gemini", body) @@ -139,7 +139,7 @@ def newest_pages(request): def known_feeds(request): body = render_template( "known_feeds.gmi", - known_feeds=gus.get_feeds(), + known_feeds=gus.feeds, index_modification_time=gus.statistics["index_modification_time"] )