db_model.py (2302B)
1 from peewee import ( 2 BooleanField, 3 DateTimeField, 4 DoesNotExist, 5 FloatField, 6 ForeignKeyField, 7 IntegerField, 8 Model, 9 SqliteDatabase, 10 TextField, 11 ) 12 13 from gus.lib.gemini import GeminiResource 14 15 16 def init_db(filename=":memory:"): 17 """ 18 Bind an SQLite database to the Peewee ORM models. 19 """ 20 models = [Link, Page, PageContent] 21 db = SqliteDatabase(filename, pragmas={ 22 'journal_mode': 'wal', 23 'cache_size': -256 * 1000, 24 'synchronous': 'normal', 25 'foreign_keys': 1, 26 'ignore_check_constraints': 0}) 27 db.bind(models) 28 db.create_tables(models) 29 return db 30 31 32 class Page(Model): 33 """ 34 Metadata of all the pages 35 """ 36 37 url = TextField(unique=True, index=True) 38 domain = TextField(null=True, index=True) 39 port = IntegerField(null=True) 40 content_type = TextField(null=True) 41 charset = TextField(null=True) 42 # TODO: normalize lang out to handle multiple values better 43 lang = TextField(null=True) 44 size = IntegerField(null=True) # in bytes 45 change_frequency = IntegerField(null=True) # in hours 46 indexed_at = DateTimeField(null=True) 47 last_crawl_at = DateTimeField(null=True) 48 last_crawl_success_at = DateTimeField(null=True) 49 last_status = IntegerField(null=True) 50 last_status_message = TextField(null=True) 51 last_success_status = IntegerField(null=True) 52 first_seen_at = DateTimeField(null=True) 53 class Meta: 54 indexes=( 55 (('last_success_status', 'first_seen_at', 'indexed_at', 'domain', 'url', 'content_type'), False), 56 (('last_crawl_at', 'last_crawl_success_at'), False) 57 ) 58 59 class PageContent(Model): 60 """ 61 Content of all pages 62 """ 63 page = ForeignKeyField(Page, backref="page_content", on_delete="CASCADE") 64 content = TextField(null=True) 65 prompt = TextField(null=True) 66 67 class Link(Model): 68 """ 69 Hyperlinks between pages in Geminispace 70 """ 71 72 from_page = ForeignKeyField(Page, backref="outbound_links", on_delete="CASCADE") 73 to_page = ForeignKeyField(Page, backref="backlinks", on_delete="CASCADE") 74 is_cross_host_like = BooleanField() 75 76 def get_is_cross_host_like(from_resource, to_resource): 77 return from_resource.normalized_host_like != to_resource.normalized_host_like