commit e1d7853bbc6abda3d8c3ec8f59768eb31458c136
parent f0c091b153051a042f4763fd0eb5da97bc41bd27
Author: René Wagner <rwa@clttr.info>
Date: Sun, 6 Feb 2022 19:53:28 +0100
add index for speedup
Diffstat:
3 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/gus/lib/db_model.py b/gus/lib/db_model.py
@@ -48,6 +48,11 @@ class Page(Model):
last_status_message = TextField(null=True)
last_success_status = IntegerField(null=True)
first_seen_at = DateTimeField(null=True)
+ class Meta:
+ indexes=(
+ (('last_crawl_success_at', 'last_success_status'), False),
+ )
+
class Link(Model):
"""
diff --git a/serve/models.py b/serve/models.py
@@ -26,17 +26,18 @@ class GUS:
SELECT DISTINCT p.domain
FROM page AS p
WHERE last_crawl_success_at IS NOT NULL
- AND last_status = 20
+ AND last_success_status = 20
ORDER BY p.domain
"""
)
self.hosts = hosts_query.execute()
+
newest_hosts_query = Page.raw(
"""
SELECT p.domain, p.first_seen_at
FROM page AS p
WHERE last_crawl_success_at IS NOT NULL
- AND last_status = 20
+ AND last_success_status = 20
AND first_seen_at IS NOT NULL
GROUP BY p.domain
ORDER BY first_seen_at DESC
@@ -48,17 +49,18 @@ class GUS:
newest_pages_query = Page.raw(
"""SELECT p.url, p.fetchable_url, p.first_seen_at FROM page as p
WHERE last_crawl_success_at IS NOT NULL
- AND last_status = 20
+ AND last_success_status = 20
AND first_seen_at IS NOT NULL
ORDER BY first_seen_at DESC
LIMIT 50""")
self.newest_pages = newest_pages_query.execute()
+
feeds_query = Page.raw(
"""SELECT DISTINCT p.url, p.fetchable_url
FROM page AS p
WHERE p.last_crawl_success_at IS NOT NULL
- AND last_status = 20
- AND p.url LIKE '%atom.xml'
+ AND last_success_status = 20
+ AND (p.url LIKE '%atom.xml'
OR p.url LIKE '%feed.xml'
OR p.url LIKE '%.rss'
OR p.url LIKE '%.atom'
diff --git a/serve/templates/news.gmi b/serve/templates/news.gmi
@@ -2,6 +2,11 @@
## News
+### 2022-02-06 filtering clients
+I've blocked two ips for repeatedly doing stupid requests again and again:
+2001:41d0:302:2200::180
+::ffff:193.70.85.11
+
### 2022-01-25 a year after
Today one year ago geminispace.info has been set up. You probably guess what happened: the cert for the capsule expired today... :-D
A new cert is in place which now lasts for ten years...