commit 39c6540bc6a851f97493bbd8790a22451aa9af57
parent e1b3ac8ab4740ef176a3995551bcacae6244588d
Author: René Wagner <rwa@clttr.info>
Date: Tue, 13 Jul 2021 17:20:53 +0200
remove Search model
We don't store search queries, although not personalized
this is no information we want to have.
Diffstat:
3 files changed, 4 insertions(+), 16 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -373,7 +373,7 @@ def crawl_page(
# crawl_delay = robots_file.crawl_delay("indexer")
if not can_fetch:
- logging.info(
+ logging.debug(
"Blocked by robots.txt, skipping: %s",
gus.lib.logging.strip_control_chars(url),
)
@@ -517,9 +517,7 @@ def load_expired_urls():
expired_pages = Page.raw(
"""SELECT url
FROM page as p
-WHERE datetime(last_crawl_at, REPLACE('fnord hours', 'fnord', change_frequency)) < datetime('now')
-"""
- )
+WHERE datetime(last_crawl_at, REPLACE('fnord hours', 'fnord', change_frequency)) < datetime('now') OR last_crawl_at IS NULL""" )
return [page.url for page in expired_pages.execute()]
diff --git a/gus/lib/db_model.py b/gus/lib/db_model.py
@@ -17,7 +17,7 @@ def init_db(filename=":memory:"):
"""
Bind an SQLite database to the Peewee ORM models.
"""
- models = [Link, Page, Search, Thread, ThreadPage]
+ models = [Link, Page, Thread, ThreadPage]
db = SqliteDatabase(filename)
db.bind(models)
db.create_tables(models)
@@ -60,15 +60,6 @@ class Link(Model):
def get_is_cross_host_like(from_resource, to_resource):
return from_resource.normalized_host_like != to_resource.normalized_host_like
-class Search(Model):
- """
- A log of performed searches
- """
-
- query = TextField()
- timestamp = DateTimeField()
-
-
class Thread(Model):
"""
Thread definitions.
diff --git a/serve/models.py b/serve/models.py
@@ -2,7 +2,7 @@ import re
from datetime import datetime
from . import constants
-from gus.lib.db_model import init_db, Page, Search, Thread
+from gus.lib.db_model import init_db, Page, Thread
from gus.lib.gemini import GeminiResource
from gus.lib.index_statistics import (
compute_index_statistics,
@@ -24,7 +24,6 @@ class GUS:
)
def search_index(self, query, requested_page):
- #Search.create(query=query, timestamp=datetime.utcnow())
query = self.index.parse_query(query)
results = self.index.search(query, requested_page, pagelen=10)
return (