geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

db_model.py (2302B)


      1 from peewee import (
      2     BooleanField,
      3     DateTimeField,
      4     DoesNotExist,
      5     FloatField,
      6     ForeignKeyField,
      7     IntegerField,
      8     Model,
      9     SqliteDatabase,
     10     TextField,
     11 )
     12 
     13 from gus.lib.gemini import GeminiResource
     14 
     15 
     16 def init_db(filename=":memory:"):
     17     """
     18     Bind an SQLite database to the Peewee ORM models.
     19     """
     20     models = [Link, Page, PageContent]
     21     db = SqliteDatabase(filename, pragmas={ 
     22         'journal_mode': 'wal',
     23         'cache_size': -256 * 1000,
     24         'synchronous': 'normal',
     25         'foreign_keys': 1,
     26         'ignore_check_constraints': 0})
     27     db.bind(models)
     28     db.create_tables(models)
     29     return db
     30 
     31 
     32 class Page(Model):
     33     """
     34     Metadata of all the pages
     35     """
     36 
     37     url = TextField(unique=True, index=True)
     38     domain = TextField(null=True, index=True)
     39     port = IntegerField(null=True)
     40     content_type = TextField(null=True)
     41     charset = TextField(null=True)
     42     # TODO: normalize lang out to handle multiple values better
     43     lang = TextField(null=True)
     44     size = IntegerField(null=True)  # in bytes
     45     change_frequency = IntegerField(null=True)  # in hours
     46     indexed_at = DateTimeField(null=True)
     47     last_crawl_at = DateTimeField(null=True)
     48     last_crawl_success_at = DateTimeField(null=True)
     49     last_status = IntegerField(null=True)
     50     last_status_message = TextField(null=True)
     51     last_success_status = IntegerField(null=True)
     52     first_seen_at = DateTimeField(null=True)
     53     class Meta:
     54         indexes=(
     55              (('last_success_status', 'first_seen_at', 'indexed_at', 'domain', 'url', 'content_type'), False),
     56              (('last_crawl_at', 'last_crawl_success_at'), False)
     57         )
     58 
     59 class PageContent(Model):
     60     """
     61     Content of all pages
     62     """
     63     page = ForeignKeyField(Page, backref="page_content", on_delete="CASCADE")
     64     content = TextField(null=True)
     65     prompt = TextField(null=True)
     66 
     67 class Link(Model):
     68     """
     69     Hyperlinks between pages in Geminispace
     70     """
     71 
     72     from_page = ForeignKeyField(Page, backref="outbound_links", on_delete="CASCADE")
     73     to_page = ForeignKeyField(Page, backref="backlinks", on_delete="CASCADE")
     74     is_cross_host_like = BooleanField()
     75 
     76     def get_is_cross_host_like(from_resource, to_resource):
     77         return from_resource.normalized_host_like != to_resource.normalized_host_like