commit 86bf28edff91ef55461ae8f27492dfadd8df8b22
parent 8a0c456fb9c75aa7c0930da9ecd67a53bc623749
Author: Natalie Pendragon <natpen@natpen.net>
Date: Tue, 11 Aug 2020 08:30:50 -0400
[crawl] Print change_frequency
Diffstat:
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -325,8 +325,6 @@ def crawl_page(gemini_resource, current_depth, should_check_if_expired=True, red
return
if should_check_if_expired:
- # expired_pages = Page.raw("SELECT p.* FROM page as p JOIN crawl as c ON p.id == c.page_id WHERE datetime(c.timestamp, REPLACE('fnord hours', 'fnord', p.change_frequency)) < datetime('now');")
- # return [page.url for page in expired_pages.execute()]
existing_page = Page.get_or_none(url=gr.indexable_url)
if existing_page and existing_page.change_frequency is not None:
most_recent_crawl = Crawl.select(fn.MAX(Crawl.timestamp)).where(Crawl.page == existing_page).scalar()
@@ -334,6 +332,8 @@ def crawl_page(gemini_resource, current_depth, should_check_if_expired=True, red
print("TOO SOON : %s" % gr.fetchable_url)
print("--------------------------")
return
+ else:
+ print("Recrawling after %s hours" % existing_page.change_frequency)
# ROBOTS
robots_file = get_robots_file(gr.normalized_host)