commit c172c20e952abe9f34c2c78446b7bcdb81dbbd71
parent d012217757ba8dfb3f2e6dff0dbdc79aa867ca4e
Author: Natalie Pendragon <natpen@natpen.net>
Date: Sun, 19 Jul 2020 09:23:46 -0400
[crawl] Update db model, and delete links before recreating
Diffstat:
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -232,6 +232,7 @@ def index_content(resource, response):
def index_links(from_resource, contained_resources):
from_page, created = Page.get_or_create(url=from_resource.indexable_url)
+ Link.delete().where(Link.from_page == from_page).execute()
data = []
for cr in contained_resources:
should_skip = False
diff --git a/gus/lib/db_model.py b/gus/lib/db_model.py
@@ -44,15 +44,15 @@ class Link(Model):
Hyperlinks between pages in Geminispace
"""
- from_page = ForeignKeyField(Page, backref="outbound_links")
- to_page = ForeignKeyField(Page, backref="backlinks")
+ from_page = ForeignKeyField(Page, backref="outbound_links", on_delete='CASCADE')
+ to_page = ForeignKeyField(Page, backref="backlinks", on_delete='CASCADE')
class Crawl(Model):
"""
Attempts to crawl a page.
"""
- page = ForeignKeyField(Page, backref="crawls")
+ page = ForeignKeyField(Page, backref="crawls", on_delete='CASCADE')
status = IntegerField()
is_different = BooleanField()
timestamp = DateTimeField()