geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit c172c20e952abe9f34c2c78446b7bcdb81dbbd71
parent d012217757ba8dfb3f2e6dff0dbdc79aa867ca4e
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Sun, 19 Jul 2020 09:23:46 -0400

[crawl] Update db model, and delete links before recreating

Diffstat:
Mgus/crawl.py | 1+
Mgus/lib/db_model.py | 6+++---
2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gus/crawl.py b/gus/crawl.py @@ -232,6 +232,7 @@ def index_content(resource, response): def index_links(from_resource, contained_resources): from_page, created = Page.get_or_create(url=from_resource.indexable_url) + Link.delete().where(Link.from_page == from_page).execute() data = [] for cr in contained_resources: should_skip = False diff --git a/gus/lib/db_model.py b/gus/lib/db_model.py @@ -44,15 +44,15 @@ class Link(Model): Hyperlinks between pages in Geminispace """ - from_page = ForeignKeyField(Page, backref="outbound_links") - to_page = ForeignKeyField(Page, backref="backlinks") + from_page = ForeignKeyField(Page, backref="outbound_links", on_delete='CASCADE') + to_page = ForeignKeyField(Page, backref="backlinks", on_delete='CASCADE') class Crawl(Model): """ Attempts to crawl a page. """ - page = ForeignKeyField(Page, backref="crawls") + page = ForeignKeyField(Page, backref="crawls", on_delete='CASCADE') status = IntegerField() is_different = BooleanField() timestamp = DateTimeField()