Reformat code with Black - geminispace.info

commit 43397bdda337c7a0f31019358bd56eb0ae87a993
parent 5eebbbfc00555da619054e8129ad70bf3de99fd5
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Fri,  6 Nov 2020 08:42:57 -0500

Reformat code with Black

Diffstat:
M gus/__init__.py  | 2 +-
M gus/build_index.py  | 104 +++++++++++++++++++++++++++++++++++++-------------------------------------------
M gus/crawl.py  | 346 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
M gus/lib/db_model.py  | 25 +++++++++++++++++--------
M gus/lib/domain.py  | 1 +
M gus/lib/gemini.py  | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M gus/lib/logging.py  | 15 +++++++--------
M gus/lib/misc.py  | 25 +++++++++++++++++--------
M gus/lib/whoosh_extensions.py  | 23 +++++++++++++++--------
M serve/constants.py  | 6 +-----
M serve/main.py  | 20 ++++----------------
M serve/models.py  | 156 ++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M serve/views.py  | 186 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
M tests/gus/lib/test_gemini.py  | 29 +++++++++++++++++------------

14 files changed, 685 insertions(+), 404 deletions(-)
diff --git a/gus/__init__.py b/gus/__init__.py
@@ -1 +1 @@
-__version__ = '0.1.0'
+__version__ = "0.1.0"
diff --git a/gus/build_index.py b/gus/build_index.py
@@ -13,7 +13,11 @@ from whoosh.index import open_dir
 
 from gus.crawl import EXCLUDED_URL_PREFIXES
 from gus.lib.db_model import init_db, Page
-from gus.lib.index_statistics import compute_index_statistics, persist_statistics, log_index_statistics
+from gus.lib.index_statistics import (
+    compute_index_statistics,
+    persist_statistics,
+    log_index_statistics,
+)
 from gus.lib.whoosh_extensions import UrlAnalyzer
 import gus.lib.logging
 
@@ -39,42 +43,16 @@ def create_index(index_dir):
     # shutil.rmtree(index_dir, ignore_errors=True)
     pathlib.Path(index_dir).mkdir(parents=True, exist_ok=True)
     schema = Schema(
-        url_id=ID(
-            unique=True,
-        ),
-        url=TEXT(
-            field_boost=2.0,
-            stored=True,
-            analyzer=UrlAnalyzer(),
-        ),
+        url_id=ID(unique=True,),
+        url=TEXT(field_boost=2.0, stored=True, analyzer=UrlAnalyzer(),),
         fetchable_url=STORED(),
-        domain=TEXT(
-            analyzer=UrlAnalyzer(),
-        ),
-        port=NUMERIC(
-            int,
-            32,
-            signed=False,
-            stored=True,
-        ),
-        content_type=TEXT(
-            stored=True,
-        ),
-        charset=ID(
-            stored=True,
-        ),
-        lang=ID(
-            stored=True,
-        ),
-        content=TEXT(
-            analyzer=FancyAnalyzer(),
-            spelling=True,
-            stored=True,
-        ),
-        prompt=TEXT(
-            analyzer=FancyAnalyzer(),
-            stored=True,
-        ),
+        domain=TEXT(analyzer=UrlAnalyzer(),),
+        port=NUMERIC(int, 32, signed=False, stored=True,),
+        content_type=TEXT(stored=True,),
+        charset=ID(stored=True,),
+        lang=ID(stored=True,),
+        content=TEXT(analyzer=FancyAnalyzer(), spelling=True, stored=True,),
+        prompt=TEXT(analyzer=FancyAnalyzer(), stored=True,),
         size=NUMERIC(
             int,
             # this means GUS will have problems indexing responses over ~2GB
@@ -83,14 +61,9 @@ def create_index(index_dir):
             stored=True,
         ),
         backlink_count=NUMERIC(
-            int,
-            16, # num bits, so max value is 65k
-            signed=False,
-            stored=True,
-        ),
-        indexed_at=DATETIME(
-            stored=True,
+            int, 16, signed=False, stored=True,  # num bits, so max value is 65k
         ),
+        indexed_at=DATETIME(stored=True,),
     )
     index_storage.create_index(schema)
 
@@ -102,16 +75,23 @@ def index_page(page, indexed_urls):
             should_skip = True
             break
     if should_skip:
-        logging.debug('URL prefix matches exclusion list, skipping: %s', gus.lib.logging.strip_control_chars(page.url))
+        logging.debug(
+            "URL prefix matches exclusion list, skipping: %s",
+            gus.lib.logging.strip_control_chars(page.url),
+        )
         return False
     if page.fetchable_url in indexed_urls:
-        logging.debug('Page already indexed, skipping: %s', gus.lib.logging.strip_control_chars(page.url))
+        logging.debug(
+            "Page already indexed, skipping: %s",
+            gus.lib.logging.strip_control_chars(page.url),
+        )
         return False
 
     logging.info("Indexing page: %s", gus.lib.logging.strip_control_chars(page.url))
 
     u = page.url.rstrip("/")
-    external_backlinks = Page.raw("""SELECT p_from.url
+    external_backlinks = Page.raw(
+        """SELECT p_from.url
 FROM page AS p_from
 JOIN indexable_crawl AS ic
 ON ic.page_id == p_from.id
@@ -121,7 +101,10 @@ JOIN page as p_to
 ON p_to.id == l.to_page_id
 WHERE p_to.url IN (?, ?)
 AND l.is_cross_host_like == 1
-GROUP BY p_from.normalized_url""", u, f"{u}/")
+GROUP BY p_from.normalized_url""",
+        u,
+        f"{u}/",
+    )
 
     backlink_urls = [b.url for b in external_backlinks.execute()]
     backlink_count = len(backlink_urls)
@@ -146,9 +129,12 @@ GROUP BY p_from.normalized_url""", u, f"{u}/")
         index_writer.add_document(**document)
         return True
     except:
-        logging.warn("Failed to index page: %s", gus.lib.logging.strip_control_chars(page.url))
+        logging.warn(
+            "Failed to index page: %s", gus.lib.logging.strip_control_chars(page.url)
+        )
         return False
 
+
 def load_indexed_urls(index_dir):
     indexed_urls = []
     ix = open_dir(index_dir)
@@ -162,7 +148,9 @@ def load_indexed_urls(index_dir):
 
 def invalidate_recent_results(invalidation_window):
     recency_minimum = datetime.now() - timedelta(hours=invalidation_window)
-    pages = Page.select().where(Page.indexed_at.is_null(False), Page.indexed_at > recency_minimum)
+    pages = Page.select().where(
+        Page.indexed_at.is_null(False), Page.indexed_at > recency_minimum
+    )
     for page in pages:
         index_writer.delete_by_term("url_id", page.url, searcher=None)
 
@@ -183,13 +171,17 @@ def build_index(should_run_destructive=False, invalidation_window=0):
     index_writer = ix.writer()
 
     invalidate_recent_results(invalidation_window)
-    indexed_urls = [] if should_run_destructive else load_indexed_urls(INDEX_DIR_CURRENT)
+    indexed_urls = (
+        [] if should_run_destructive else load_indexed_urls(INDEX_DIR_CURRENT)
+    )
 
-    pages = Page.raw("""SELECT p.*, MAX(c.timestamp) AS crawl_timestamp
+    pages = Page.raw(
+        """SELECT p.*, MAX(c.timestamp) AS crawl_timestamp
 FROM indexable_crawl AS c
 JOIN page AS p
 ON p.id == c.page_id
-GROUP BY p.normalized_url""")
+GROUP BY p.normalized_url"""
+    )
 
     i = 0
     for page in pages.iterator():
@@ -202,10 +194,10 @@ GROUP BY p.normalized_url""")
         # it to flush segments to disk every 5000 documents, which
         # should scale well with Geminispace going forward.
         if i % 5000 == 0:
-            logging.debug('Committing index.')
+            logging.debug("Committing index.")
             index_writer.commit()
             index_writer = ix.writer()
-    logging.debug('Committing index for the last time.')
+    logging.debug("Committing index for the last time.")
     index_writer.commit()
 
     index_statistics = compute_index_statistics(db)
@@ -216,7 +208,7 @@ GROUP BY p.normalized_url""")
     #     shutil.rmtree(INDEX_DIR_CURRENT, ignore_errors=True)
     #     shutil.move(INDEX_DIR_NEW, INDEX_DIR_CURRENT)
 
-    logging.info('Finished!')
+    logging.info("Finished!")
 
 
 def main():
@@ -226,7 +218,7 @@ def main():
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='Crawl Geminispace.')
+    parser = argparse.ArgumentParser(description="Crawl Geminispace.")
     parser.add_argument(
         "--destructive",
         "-d",
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -155,7 +155,10 @@ CRAWL_DELAYS = {
 
 
 def index_binary(resource, response):
-    logging.debug('Indexing binary for: %s', gus.lib.logging.strip_control_chars(resource.indexable_url))
+    logging.debug(
+        "Indexing binary for: %s",
+        gus.lib.logging.strip_control_chars(resource.indexable_url),
+    )
 
     doc = {
         "url": resource.indexable_url,
@@ -171,15 +174,23 @@ def index_binary(resource, response):
     existing_page = Page.get_or_none(url=resource.indexable_url)
     if existing_page:
         doc["id"] = existing_page.id
-        existing_change_frequency = existing_page.change_frequency or resource.get_default_change_frequency("binary")
-        doc["change_frequency"] = resource.increment_change_frequency(existing_change_frequency, "binary")
+        existing_change_frequency = (
+            existing_page.change_frequency
+            or resource.get_default_change_frequency("binary")
+        )
+        doc["change_frequency"] = resource.increment_change_frequency(
+            existing_change_frequency, "binary"
+        )
     page = Page(**doc)
     page.save()
     return page
 
 
 def index_redirect(resource):
-    logging.debug('Indexing redirect for: %s', gus.lib.logging.strip_control_chars(resource.indexable_url))
+    logging.debug(
+        "Indexing redirect for: %s",
+        gus.lib.logging.strip_control_chars(resource.indexable_url),
+    )
 
     doc = {
         "url": resource.indexable_url,
@@ -192,15 +203,23 @@ def index_redirect(resource):
     existing_page = Page.get_or_none(url=resource.indexable_url)
     if existing_page:
         doc["id"] = existing_page.id
-        existing_change_frequency = existing_page.change_frequency or resource.get_default_change_frequency("redirect")
-        doc["change_frequency"] = resource.increment_change_frequency(existing_change_frequency, "redirect")
+        existing_change_frequency = (
+            existing_page.change_frequency
+            or resource.get_default_change_frequency("redirect")
+        )
+        doc["change_frequency"] = resource.increment_change_frequency(
+            existing_change_frequency, "redirect"
+        )
     page = Page(**doc)
     page.save()
     return page
 
 
 def index_error(resource, is_temporary):
-    logging.debug('Indexing error for: %s', gus.lib.logging.strip_control_chars(resource.indexable_url))
+    logging.debug(
+        "Indexing error for: %s",
+        gus.lib.logging.strip_control_chars(resource.indexable_url),
+    )
 
     category = "temp_error" if is_temporary else "perm_error"
     default_change_frequency = resource.get_default_change_frequency(category)
@@ -215,16 +234,22 @@ def index_error(resource, is_temporary):
     existing_page = Page.get_or_none(url=resource.indexable_url)
     if existing_page:
         doc["id"] = existing_page.id
-        existing_change_frequency = existing_page.change_frequency or default_change_frequency
-        doc["change_frequency"] = resource.increment_change_frequency(existing_change_frequency, category)
+        existing_change_frequency = (
+            existing_page.change_frequency or default_change_frequency
+        )
+        doc["change_frequency"] = resource.increment_change_frequency(
+            existing_change_frequency, category
+        )
     page = Page(**doc)
     page.save()
     return page
 
 
-
 def index_prompt(resource, response):
-    logging.debug('Indexing prompt for: %s', gus.lib.logging.strip_control_chars(resource.indexable_url))
+    logging.debug(
+        "Indexing prompt for: %s",
+        gus.lib.logging.strip_control_chars(resource.indexable_url),
+    )
 
     doc = {
         "url": resource.indexable_url,
@@ -241,15 +266,23 @@ def index_prompt(resource, response):
     existing_page = Page.get_or_none(url=resource.indexable_url)
     if existing_page:
         doc["id"] = existing_page.id
-        existing_change_frequency = existing_page.change_frequency or resource.get_default_change_frequency("prompt")
-        doc["change_frequency"] = resource.increment_change_frequency(existing_change_frequency, "prompt")
+        existing_change_frequency = (
+            existing_page.change_frequency
+            or resource.get_default_change_frequency("prompt")
+        )
+        doc["change_frequency"] = resource.increment_change_frequency(
+            existing_change_frequency, "prompt"
+        )
     page = Page(**doc)
     page.save()
     return page
 
 
 def index_content(resource, response):
-    logging.debug('Indexing content for: %s', gus.lib.logging.strip_control_chars(resource.indexable_url))
+    logging.debug(
+        "Indexing content for: %s",
+        gus.lib.logging.strip_control_chars(resource.indexable_url),
+    )
 
     doc = {
         "url": resource.indexable_url,
@@ -264,7 +297,7 @@ def index_content(resource, response):
         "change_frequency": resource.get_default_change_frequency("content"),
     }
     if response.content_type == "text/gemini":
-        doc["lang"] = response.lang or "none",
+        doc["lang"] = (response.lang or "none",)
     existing_page = Page.get_or_none(url=resource.indexable_url)
     is_different = False
     if existing_page:
@@ -272,10 +305,17 @@ def index_content(resource, response):
         if existing_page.content:
             is_different = doc["content"] != existing_page.content
             if is_different:
-                doc["change_frequency"] = resource.get_default_change_frequency("content")
+                doc["change_frequency"] = resource.get_default_change_frequency(
+                    "content"
+                )
             else:
-                existing_change_frequency = existing_page.change_frequency or resource.get_default_change_frequency("content")
-                doc["change_frequency"] = resource.increment_change_frequency(existing_change_frequency, "content")
+                existing_change_frequency = (
+                    existing_page.change_frequency
+                    or resource.get_default_change_frequency("content")
+                )
+                doc["change_frequency"] = resource.increment_change_frequency(
+                    existing_change_frequency, "content"
+                )
     page = Page(**doc)
     page.save()
     return page, is_different
@@ -301,17 +341,21 @@ def index_links(from_resource, contained_resources):
                 domain=cr.normalized_host,
                 port=cr.urlsplit.port or 1965,
             )
-        data.append({
-            "from_page": from_page,
-            "to_page": to_page,
-            "is_cross_host_like": Link.get_is_cross_host_like(from_resource, cr),
-        })
+        data.append(
+            {
+                "from_page": from_page,
+                "to_page": to_page,
+                "is_cross_host_like": Link.get_is_cross_host_like(from_resource, cr),
+            }
+        )
     Link.insert_many(data).execute()
 
 
 def fetch_robots_file(robot_host):
     robot_url = urljoin("gemini://{}".format(robot_host), "/robots.txt")
-    logging.info('Fetching robots file: %s', gus.lib.logging.strip_control_chars(robot_url))
+    logging.info(
+        "Fetching robots file: %s", gus.lib.logging.strip_control_chars(robot_url)
+    )
     rp = GeminiRobotFileParser(robot_url)
     rp.read()
 
@@ -322,30 +366,52 @@ def get_robots_file(robot_host):
     return robot_file_map[robot_host]
 
 
-def crawl_page(gemini_resource, current_depth, should_check_if_expired=True, redirect_chain=[]):
+def crawl_page(
+    gemini_resource, current_depth, should_check_if_expired=True, redirect_chain=[]
+):
     gr = gemini_resource
     url = gr.fetchable_url
     if max_crawl_depth >= 0 and current_depth > max_crawl_depth:
-        logging.warn('Going too deep, skipping: %s', gus.lib.logging.strip_control_chars(url))
+        logging.warn(
+            "Going too deep, skipping: %s", gus.lib.logging.strip_control_chars(url)
+        )
         return
     if not gemini_resource.is_valid:
-        logging.warn('Not a valid gemini resource, skipping: %s', gus.lib.logging.strip_control_chars(url))
+        logging.warn(
+            "Not a valid gemini resource, skipping: %s",
+            gus.lib.logging.strip_control_chars(url),
+        )
         return
     for excluded_prefix in EXCLUDED_URL_PREFIXES:
         if gr.normalized_url.startswith(excluded_prefix):
-            logging.info('URL prefix matches exclusion list, skipping: %s', gus.lib.logging.strip_control_chars(url))
+            logging.info(
+                "URL prefix matches exclusion list, skipping: %s",
+                gus.lib.logging.strip_control_chars(url),
+            )
             return
     for excluded_path in EXCLUDED_URL_PATHS:
         if gr.urlsplit.path.lower().endswith(excluded_path):
-            logging.info('URL on exclusion list, skipping: %s', gus.lib.logging.strip_control_chars(url))
+            logging.info(
+                "URL on exclusion list, skipping: %s",
+                gus.lib.logging.strip_control_chars(url),
+            )
             return
 
     if should_check_if_expired:
         existing_page = Page.get_or_none(url=gr.indexable_url)
         if existing_page and existing_page.change_frequency is not None:
-            most_recent_crawl = Crawl.select(peewee.fn.MAX(Crawl.timestamp)).where(Crawl.page == existing_page).scalar()
-            if most_recent_crawl and datetime.now() < most_recent_crawl + timedelta(hours=existing_page.change_frequency):
-                logging.debug('Recrawling too soon, skipping: %s', gus.lib.logging.strip_control_chars(gr.fetchable_url))
+            most_recent_crawl = (
+                Crawl.select(peewee.fn.MAX(Crawl.timestamp))
+                .where(Crawl.page == existing_page)
+                .scalar()
+            )
+            if most_recent_crawl and datetime.now() < most_recent_crawl + timedelta(
+                hours=existing_page.change_frequency
+            ):
+                logging.debug(
+                    "Recrawling too soon, skipping: %s",
+                    gus.lib.logging.strip_control_chars(gr.fetchable_url),
+                )
                 return
 
     # ROBOTS
@@ -365,120 +431,188 @@ def crawl_page(gemini_resource, current_depth, should_check_if_expired=True, red
         crawl_delay = robots_file.crawl_delay("gus")
 
         if not can_fetch:
-            logging.debug('Blocked by robots files, skipping: %s', gus.lib.logging.strip_control_chars(url))
+            logging.debug(
+                "Blocked by robots files, skipping: %s",
+                gus.lib.logging.strip_control_chars(url),
+            )
             return
 
     # Crawl delay
     if gr.normalized_host in domain_hit_timings:
         if gr.normalized_host in CRAWL_DELAYS:
-            next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=CRAWL_DELAYS[gr.normalized_host])
+            next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(
+                milliseconds=CRAWL_DELAYS[gr.normalized_host]
+            )
         elif not crawl_delay:
-            next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=500)
+            next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(
+                milliseconds=500
+            )
         else:
-            next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(milliseconds=crawl_delay)
+            next_allowed_hit = domain_hit_timings[gr.normalized_host] + timedelta(
+                milliseconds=crawl_delay
+            )
         sleep_duration = max((next_allowed_hit - datetime.now()).total_seconds(), 0)
         time.sleep(sleep_duration)
     domain_hit_timings[gr.normalized_host] = datetime.now()
 
     # Actually fetch!
-    logging.info('Fetching resource: %s', gus.lib.logging.strip_control_chars(url))
+    logging.info("Fetching resource: %s", gus.lib.logging.strip_control_chars(url))
     if gr.fully_qualified_parent_url is not None:
-        logging.debug('with parent: %s', gus.lib.logging.strip_control_chars(gr.fully_qualified_parent_url))
+        logging.debug(
+            "with parent: %s",
+            gus.lib.logging.strip_control_chars(gr.fully_qualified_parent_url),
+        )
     response = gr.fetch()
 
     if response is None:
         # problem before getting a response
-        logging.warn('Failed to fetch: %s', gus.lib.logging.strip_control_chars(url))
+        logging.warn("Failed to fetch: %s", gus.lib.logging.strip_control_chars(url))
         page = index_error(gr, True)
-        page_crawl = Crawl(page=page,
-                           status=0,
-                           is_different=False,
-                           timestamp=datetime.utcnow())
+        page_crawl = Crawl(
+            page=page, status=0, is_different=False, timestamp=datetime.utcnow()
+        )
         page_crawl.save()
 
     elif response.status.startswith("4"):
         # temporary error status
-        logging.debug('Got temporary error: %s: %s %s',
-                      gus.lib.logging.strip_control_chars(url),
-                      response.status,
-                      response.error_message)
+        logging.debug(
+            "Got temporary error: %s: %s %s",
+            gus.lib.logging.strip_control_chars(url),
+            response.status,
+            response.error_message,
+        )
         page = index_error(gr, True)
-        page_crawl = Crawl(page=page,
-                           status=response.status,
-                           is_different=False,
-                           error_message=response.error_message,
-                           timestamp=datetime.utcnow())
+        page_crawl = Crawl(
+            page=page,
+            status=response.status,
+            is_different=False,
+            error_message=response.error_message,
+            timestamp=datetime.utcnow(),
+        )
         page_crawl.save()
 
     elif response.status.startswith("5"):
         # permanent error status
-        logging.debug('Got permanent error: %s: %s %s',
-                      gus.lib.logging.strip_control_chars(url),
-                      response.status,
-                      response.error_message)
+        logging.debug(
+            "Got permanent error: %s: %s %s",
+            gus.lib.logging.strip_control_chars(url),
+            response.status,
+            response.error_message,
+        )
         page = index_error(gr, False)
-        page_crawl = Crawl(page=page,
-                           status=response.status,
-                           is_different=False,
-                           error_message=response.error_message,
-                           timestamp=datetime.utcnow())
+        page_crawl = Crawl(
+            page=page,
+            status=response.status,
+            is_different=False,
+            error_message=response.error_message,
+            timestamp=datetime.utcnow(),
+        )
         page_crawl.save()
 
     elif response.status.startswith("3"):
         # redirect status
-        logging.debug('Got redirected: %s: %s %s',
-                      gus.lib.logging.strip_control_chars(url),
-                      response.status,
-                      response.url)
+        logging.debug(
+            "Got redirected: %s: %s %s",
+            gus.lib.logging.strip_control_chars(url),
+            response.status,
+            response.url,
+        )
         if len(redirect_chain) > constants.MAXIMUM_REDIRECT_CHAIN_LENGTH:
-            logging.info('Aborting, maximum redirect chain length reached: %s', gus.lib.logging.strip_control_chars(url))
+            logging.info(
+                "Aborting, maximum redirect chain length reached: %s",
+                gus.lib.logging.strip_control_chars(url),
+            )
             return
-        redirect_resource = GeminiResource(response.url, gr.normalized_url, gr.normalized_host)
+        redirect_resource = GeminiResource(
+            response.url, gr.normalized_url, gr.normalized_host
+        )
         if redirect_resource.fetchable_url == gr.fetchable_url:
-            logging.info('Aborting, redirecting to self: %s', gus.lib.logging.strip_control_chars(url))
+            logging.info(
+                "Aborting, redirecting to self: %s",
+                gus.lib.logging.strip_control_chars(url),
+            )
             return
         page = index_redirect(gr)
-        page_crawl = Crawl(page=page, status=response.status, is_different=False, timestamp=datetime.utcnow())
+        page_crawl = Crawl(
+            page=page,
+            status=response.status,
+            is_different=False,
+            timestamp=datetime.utcnow(),
+        )
         page_crawl.save()
         index_links(gr, [redirect_resource])
-        crawl_page(redirect_resource, current_depth, should_check_if_expired=True, redirect_chain=redirect_chain + [gr.fetchable_url])
+        crawl_page(
+            redirect_resource,
+            current_depth,
+            should_check_if_expired=True,
+            redirect_chain=redirect_chain + [gr.fetchable_url],
+        )
 
     elif response.status.startswith("1"):
         # input status
-        logging.debug('Input requested at: %s: %s %s', gus.lib.logging.strip_control_chars(url), response.status, response.prompt)
+        logging.debug(
+            "Input requested at: %s: %s %s",
+            gus.lib.logging.strip_control_chars(url),
+            response.status,
+            response.prompt,
+        )
         page = index_prompt(gr, response)
-        page_crawl = Crawl(page=page, status=response.status, is_different=False, timestamp=datetime.utcnow())
+        page_crawl = Crawl(
+            page=page,
+            status=response.status,
+            is_different=False,
+            timestamp=datetime.utcnow(),
+        )
         page_crawl.save()
     elif response.status.startswith("2"):
         # success status
-        logging.debug('Successful request: %s: %s %s', gus.lib.logging.strip_control_chars(url), response.status, response.content_type)
+        logging.debug(
+            "Successful request: %s: %s %s",
+            gus.lib.logging.strip_control_chars(url),
+            response.status,
+            response.content_type,
+        )
         if response.content_type.startswith("text/"):
             page, is_different = index_content(gr, response)
             page_crawl = Crawl(
                 page=page,
                 status=response.status,
                 is_different=is_different,
-                timestamp=datetime.utcnow()
+                timestamp=datetime.utcnow(),
             )
             page_crawl.save()
             if response.content_type != "text/gemini":
-                logging.debug('Content is not gemini text: %s: %s',
-                              gus.lib.logging.strip_control_chars(url), response.content_type)
+                logging.debug(
+                    "Content is not gemini text: %s: %s",
+                    gus.lib.logging.strip_control_chars(url),
+                    response.content_type,
+                )
             else:
-                logging.debug('Got gemini text, extracting and crawling links: %s',
-                              gus.lib.logging.strip_control_chars(url))
+                logging.debug(
+                    "Got gemini text, extracting and crawling links: %s",
+                    gus.lib.logging.strip_control_chars(url),
+                )
                 contained_resources = gr.extract_contained_resources(response.content)
                 index_links(gr, contained_resources)
                 for resource in contained_resources:
-                    crawl_page(resource, current_depth+1, should_check_if_expired=True)
+                    crawl_page(
+                        resource, current_depth + 1, should_check_if_expired=True
+                    )
         else:
             page = index_binary(gr, response)
-            page_crawl = Crawl(page=page, status=response.status, is_different=False, timestamp=datetime.utcnow())
+            page_crawl = Crawl(
+                page=page,
+                status=response.status,
+                is_different=False,
+                timestamp=datetime.utcnow(),
+            )
             page_crawl.save()
     else:
-        logging.warn('Got unhandled status: %s: %s',
-                     gus.lib.logging.strip_control_chars(url),
-                     response.status)
+        logging.warn(
+            "Got unhandled status: %s: %s",
+            gus.lib.logging.strip_control_chars(url),
+            response.status,
+        )
 
 
 def pickle_robot_file_map(robot_file_map, index_dir):
@@ -487,13 +621,14 @@ def pickle_robot_file_map(robot_file_map, index_dir):
 
 def unpickle_robot_file_map(index_dir):
     if not os.path.isfile(index_dir + "/robot_file_map.p"):
-        logging.debug('Robot file cache missing')
+        logging.debug("Robot file cache missing")
         return {}
     return pickle.load(open(index_dir + "/robot_file_map.p", "rb"))
 
 
 def load_expired_urls():
-    expired_pages = Page.raw("""SELECT url
+    expired_pages = Page.raw(
+        """SELECT url
 FROM (
   SELECT p.url, p.normalized_url, p.change_frequency, MAX(c.timestamp) as timestamp
   FROM page as p
@@ -502,7 +637,8 @@ FROM (
   GROUP BY p.url
 )
 WHERE datetime(timestamp, REPLACE('fnord hours', 'fnord', change_frequency)) < datetime('now')
-GROUP BY normalized_url;""")
+GROUP BY normalized_url;"""
+    )
     return [page.url for page in expired_pages.execute()]
 
 
@@ -528,7 +664,10 @@ def load_feed_urls(filename):
 def items_from_feed_string(feed_str):
     feed_obj = feedparser.parse(feed_str)
     feed = feed_obj.feed
-    return [(entry.updated_parsed, entry.link, entry.title, feed.title) for entry in feed_obj.entries]
+    return [
+        (entry.updated_parsed, entry.link, entry.title, feed.title)
+        for entry in feed_obj.entries
+    ]
 
 
 def resolve_feed_content_urls(feed_file=constants.FEED_FILE):
@@ -550,26 +689,29 @@ def resolve_feed_content_urls(feed_file=constants.FEED_FILE):
         now = time.time()
         interval = int(now - last)
         if interval < 5:
-            logging.warn('Declining to hit %s again after only %d seconds',
-                         gus.lib.logging.strip_control_chars(feed_resource.normalized_host),
-                         interval)
+            logging.warn(
+                "Declining to hit %s again after only %d seconds",
+                gus.lib.logging.strip_control_chars(feed_resource.normalized_host),
+                interval,
+            )
             feed_urls.insert(0, feed_url)
             skips += 1
             if skips == len(feed_urls):
                 # We've hammered every server in the queue!  Sleep a bit...
-                logging.warn('Sleeping to give all servers a rest!')
+                logging.warn("Sleeping to give all servers a rest!")
                 time.sleep(5)
             continue
         skips = 0
 
         # Good to go
-        logging.info('Fetching feed: %s',
-                     gus.lib.logging.strip_control_chars(feed_url))
+        logging.info("Fetching feed: %s", gus.lib.logging.strip_control_chars(feed_url))
         try:
             resp = feed_resource.fetch()
         except:
-            logging.info('Error fetching feed, skipping: %s',
-                         gus.lib.logging.strip_control_chars(feed_url))
+            logging.info(
+                "Error fetching feed, skipping: %s",
+                gus.lib.logging.strip_control_chars(feed_url),
+            )
             continue
         if resp and resp.status == "20":
             last_accessed[feed_resource.normalized_host] = time.time()
@@ -595,8 +737,10 @@ def recrawl_feeds():
         crawl_page(resource, 0)
 
     pickle_robot_file_map(robot_file_map, index_dir)
-    logging.debug('Recrawled feeds: %s', gus.lib.logging.strip_control_chars(content_urls))
-    logging.info('Finished!')
+    logging.debug(
+        "Recrawled feeds: %s", gus.lib.logging.strip_control_chars(content_urls)
+    )
+    logging.info("Finished!")
 
 
 def run_crawl(should_run_destructive=False, seed_urls=[]):
@@ -609,7 +753,9 @@ def run_crawl(should_run_destructive=False, seed_urls=[]):
     db = init_db(f"{index_dir}/{constants.DB_FILENAME}")
 
     global robot_file_map
-    robot_file_map = {} if should_run_destructive else unpickle_robot_file_map(INDEX_DIR_CURRENT)
+    robot_file_map = (
+        {} if should_run_destructive else unpickle_robot_file_map(INDEX_DIR_CURRENT)
+    )
     global domain_hit_timings
     domain_hit_timings = {}
     global max_crawl_depth
@@ -623,7 +769,7 @@ def run_crawl(should_run_destructive=False, seed_urls=[]):
         crawl_page(resource, 0, should_check_if_expired=True)
 
     pickle_robot_file_map(robot_file_map, index_dir)
-    logging.info('Finished!')
+    logging.info("Finished!")
 
 
 def main():
@@ -637,7 +783,7 @@ def main():
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description='Crawl Geminispace.')
+    parser = argparse.ArgumentParser(description="Crawl Geminispace.")
     parser.add_argument(
         "--destructive",
         "-d",
diff --git a/gus/lib/db_model.py b/gus/lib/db_model.py
@@ -12,6 +12,7 @@ from peewee import (
 
 from gus.lib.gemini import GeminiResource
 
+
 def init_db(filename=":memory:"):
     """
     Bind an SQLite database to the Peewee ORM models.
@@ -20,13 +21,15 @@ def init_db(filename=":memory:"):
     db = SqliteDatabase(filename)
     db.bind(models)
     db.create_tables(models)
-    db.execute_sql("""CREATE VIEW IF NOT EXISTS indexable_crawl AS
+    db.execute_sql(
+        """CREATE VIEW IF NOT EXISTS indexable_crawl AS
 SELECT c.* FROM (
   SELECT crawl.*, row_number()
   OVER (PARTITION BY page_id ORDER BY timestamp DESC) AS rank
   FROM crawl) AS c
 WHERE c.rank < 3
-AND c.status == 20;""")
+AND c.status == 20;"""
+    )
     return db
 
 
@@ -46,33 +49,36 @@ class Page(Model):
     lang = TextField(null=True)
     content = TextField(null=True)
     prompt = TextField(null=True)
-    size = IntegerField(null=True) # in bytes
-    change_frequency = IntegerField(null=True) # in hours
+    size = IntegerField(null=True)  # in bytes
+    change_frequency = IntegerField(null=True)  # in hours
     indexed_at = DateTimeField(null=True)
 
+
 class Link(Model):
     """
     Hyperlinks between pages in Geminispace
     """
 
-    from_page = ForeignKeyField(Page, backref="outbound_links", on_delete='CASCADE')
-    to_page = ForeignKeyField(Page, backref="backlinks", on_delete='CASCADE')
+    from_page = ForeignKeyField(Page, backref="outbound_links", on_delete="CASCADE")
+    to_page = ForeignKeyField(Page, backref="backlinks", on_delete="CASCADE")
     is_cross_host_like = BooleanField()
 
     def get_is_cross_host_like(from_resource, to_resource):
         return from_resource.normalized_host_like != to_resource.normalized_host_like
 
+
 class Crawl(Model):
     """
     Attempts to crawl a page.
     """
 
-    page = ForeignKeyField(Page, backref="crawls", on_delete='CASCADE')
+    page = ForeignKeyField(Page, backref="crawls", on_delete="CASCADE")
     status = IntegerField()
     error_message = TextField(null=True)
     is_different = BooleanField()
     timestamp = DateTimeField()
 
+
 class Search(Model):
     """
     A log of performed searches
@@ -81,19 +87,22 @@ class Search(Model):
     query = TextField()
     timestamp = DateTimeField()
 
+
 class Thread(Model):
     """
     Thread definitions.
     """
+
     updated_at = DateTimeField()
 
+
 class ThreadPage(Model):
     """
     Mapping table of threads to their member pages.
     """
 
     thread = ForeignKeyField(Thread, backref="pages", on_delete="CASCADE")
-    page = ForeignKeyField(Page, backref="threads", on_delete='CASCADE')
+    page = ForeignKeyField(Page, backref="threads", on_delete="CASCADE")
     address = TextField()
     friendly_author = TextField()
     friendly_title = TextField()
diff --git a/gus/lib/domain.py b/gus/lib/domain.py
@@ -1,5 +1,6 @@
 import re
 
+
 def is_domain(possible_domain):
     domain_pattern = "^((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+(aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|ac|or|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|an|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|bl|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|boots|bosch|bostik|boston|bot|boutique|box|bq|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|chloe|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|doosan|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|eh|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|flsmidth|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|htc|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|iinet|ikano|il|im|imamat|imdb|immo|immobilien|in|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|iwc|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jlc|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mcd|mcdonalds|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|meo|merckmsd|metlife|mf|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|montblanc|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtpc|mtr|mu|museum|mutual|mutuelle|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|orientexpress|origins|osaka|otsuka|ott|ovh|pa|page|pamperedchef|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|ss|st|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telecity|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tp|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|um|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vista|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|测试|कॉम|परीक्षा|セール|佛山|ಭಾರತ|慈善|集团|在线|한국|ଭାରତ|大众汽车|点看|คอม|ভাৰত|ভারত|八卦|‏موقع‎|বাংলা|公益|公司|香格里拉|网站|移动|我爱你|москва|испытание|қаз|католик|онлайн|сайт|联通|срб|бг|бел|‏קום‎|时尚|微博|테스트|淡马锡|ファッション|орг|नेट|ストア|삼성|சிங்கப்பூர்|商标|商店|商城|дети|мкд|‏טעסט‎|ею|ポイント|新闻|工行|家電|‏كوم‎|中文网|中信|中国|中國|娱乐|谷歌|భారత్|ලංකා|電訊盈科|购物|測試|クラウド|ભારત|通販|भारतम्|भारत|भारोत|‏آزمایشی‎|பரிட்சை|网店|संगठन|餐厅|网络|ком|укр|香港|诺基亚|食品|δοκιμή|飞利浦|‏إختبار‎|台湾|台灣|手表|手机|мон|‏الجزائر‎|‏عمان‎|‏ارامكو‎|‏ایران‎|‏العليان‎|‏اتصالات‎|‏امارات‎|‏بازار‎|‏موريتانيا‎|‏پاکستان‎|‏الاردن‎|‏موبايلي‎|‏بارت‎|‏بھارت‎|‏المغرب‎|‏ابوظبي‎|‏السعودية‎|‏ڀارت‎|‏كاثوليك‎|‏سودان‎|‏همراه‎|‏عراق‎|‏مليسيا‎|澳門|닷컴|政府|‏شبكة‎|‏بيتك‎|‏عرب‎|გე|机构|组织机构|健康|ไทย|‏سورية‎|招聘|рус|рф|珠宝|‏تونس‎|大拿|みんな|グーグル|ελ|世界|書籍|ഭാരതം|ਭਾਰਤ|网址|닷넷|コム|天主教|游戏|vermögensberater|vermögensberatung|企业|信息|嘉里大酒店|嘉里|‏مصر‎|‏قطر‎|广东|இலங்கை|இந்தியா|հայ|新加坡|‏فلسطين‎|テスト|政务|xperia|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)$"
     domain_match = re.match(domain_pattern, possible_domain, re.I)
diff --git a/gus/lib/gemini.py b/gus/lib/gemini.py
@@ -1,5 +1,12 @@
 import re
-from urllib.parse import unquote, urljoin, urlsplit, urlunsplit, uses_relative, uses_netloc
+from urllib.parse import (
+    unquote,
+    urljoin,
+    urlsplit,
+    urlunsplit,
+    uses_relative,
+    uses_netloc,
+)
 from urllib.robotparser import RobotFileParser
 
 import gusmobile
@@ -12,22 +19,47 @@ from gus.lib.domain import is_domain
 uses_relative.append("gemini")
 uses_netloc.append("gemini")
 
-LOG_ROOT_LIKE_PATTERN = re.compile(".*/(gemlog|glog|journal|twinlog|posts|post|tangents|phlog|starlog|pikkulog|blog|log)/?$", flags=re.IGNORECASE)
-LOG_POST_LIKE_PATTERN = re.compile(".*/((gemlog|glog|journal|twinlog|posts|post|tangents|phlog|starlog|pikkulog|blog|log)/.+$|.*/\d{4}[-_]\d{2}[-_]\d{2}.*$|.*/(19|20)\d{6}.*$)", flags=re.IGNORECASE)
-LOG_POST_LIKE_EXCLUSION_PATTERN = re.compile(".*/(games|archive|archives|rss|handlers|diagnostics)/.*|.*atom.xml$|.*gemlog.gmi$|.*index.gmi$|.*index.gemini$", flags=re.IGNORECASE)
-LOG_POST_GEMLOGBLUE_LIKE_PATTERN = re.compile("^/users/[a-z][-a-z0-9]*/\d+\.gmi?", flags=re.IGNORECASE)
-LOG_POST_BOSTON_LIKE_PATTERN = re.compile("^/boston/\d{4}/\d{2}/\d+\.\d+", flags=re.IGNORECASE)
-
-ROOT_LIKE_ONLY_PATTERN = re.compile("^/(~[a-z][-a-z0-9]*|users/[a-z][-a-z0-9]*|users)/?$", flags=re.IGNORECASE)
-ROOT_LIKE_PATTERN = re.compile("^/(~[a-z][-a-z0-9]*|users/[a-z][-a-z0-9]*|users)/?", flags=re.IGNORECASE)
+LOG_ROOT_LIKE_PATTERN = re.compile(
+    ".*/(gemlog|glog|journal|twinlog|posts|post|tangents|phlog|starlog|pikkulog|blog|log)/?$",
+    flags=re.IGNORECASE,
+)
+LOG_POST_LIKE_PATTERN = re.compile(
+    ".*/((gemlog|glog|journal|twinlog|posts|post|tangents|phlog|starlog|pikkulog|blog|log)/.+$|.*/\d{4}[-_]\d{2}[-_]\d{2}.*$|.*/(19|20)\d{6}.*$)",
+    flags=re.IGNORECASE,
+)
+LOG_POST_LIKE_EXCLUSION_PATTERN = re.compile(
+    ".*/(games|archive|archives|rss|handlers|diagnostics)/.*|.*atom.xml$|.*gemlog.gmi$|.*index.gmi$|.*index.gemini$",
+    flags=re.IGNORECASE,
+)
+LOG_POST_GEMLOGBLUE_LIKE_PATTERN = re.compile(
+    "^/users/[a-z][-a-z0-9]*/\d+\.gmi?", flags=re.IGNORECASE
+)
+LOG_POST_BOSTON_LIKE_PATTERN = re.compile(
+    "^/boston/\d{4}/\d{2}/\d+\.\d+", flags=re.IGNORECASE
+)
+
+ROOT_LIKE_ONLY_PATTERN = re.compile(
+    "^/(~[a-z][-a-z0-9]*|users/[a-z][-a-z0-9]*|users)/?$", flags=re.IGNORECASE
+)
+ROOT_LIKE_PATTERN = re.compile(
+    "^/(~[a-z][-a-z0-9]*|users/[a-z][-a-z0-9]*|users)/?", flags=re.IGNORECASE
+)
 
 PIKKULOG_LIKE_PATTERN = re.compile(".*/pikkulog/.*", flags=re.IGNORECASE)
 
-AUTHOR_URL_PATTERN = re.compile("^/~([a-z][-a-z0-9]*)/|^/users/~?([a-z][-a-z0-9]*)", flags=re.IGNORECASE)
-AUTHOR_CONTENT_PATTERN = re.compile(".*(by|author): ([\w\s\d]+)", flags=re.IGNORECASE | re.MULTILINE)
+AUTHOR_URL_PATTERN = re.compile(
+    "^/~([a-z][-a-z0-9]*)/|^/users/~?([a-z][-a-z0-9]*)", flags=re.IGNORECASE
+)
+AUTHOR_CONTENT_PATTERN = re.compile(
+    ".*(by|author): ([\w\s\d]+)", flags=re.IGNORECASE | re.MULTILINE
+)
 
 TITLE_CONTENT_PATTERN = re.compile("^#\s(.*)$", flags=re.IGNORECASE | re.MULTILINE)
-TITLE_URL_PATTERN = re.compile(".*/(\d{8}[-_]|\d{4}[-_]\d{2}[-_]\d{2}[-_])?([a-z0-9-_]+)(\.[a-z0-9]+)$", flags=re.IGNORECASE)
+TITLE_URL_PATTERN = re.compile(
+    ".*/(\d{8}[-_]|\d{4}[-_]\d{2}[-_]\d{2}[-_])?([a-z0-9-_]+)(\.[a-z0-9]+)$",
+    flags=re.IGNORECASE,
+)
+
 
 class GeminiRobotFileParser(RobotFileParser):
     def set_url(self, url):
@@ -36,7 +68,6 @@ class GeminiRobotFileParser(RobotFileParser):
         u, _ = GeminiResource.urlsplit_featureful(url)
         self.host, self.path = u[1:3]
 
-
     def read(self):
         """Reads the robots.txt URL and feeds it to the parser."""
         gr = GeminiResource(self.url)
@@ -50,7 +81,7 @@ class GeminiRobotFileParser(RobotFileParser):
             self.parse(response.content.splitlines())
 
 
-class GeminiResource():
+class GeminiResource:
     def __init__(self, url, fully_qualified_parent_url=None, parent_hostname=None):
         self.raw_url = url
         self.urlsplit, self.is_relative = GeminiResource.urlsplit_featureful(
@@ -80,7 +111,7 @@ class GeminiResource():
         # things behind the scenes.
 
         is_relative = False
-        u = urlsplit(url, 'gemini')
+        u = urlsplit(url, "gemini")
         if u.scheme != "gemini":
             return None, None
         if u.hostname is None:
@@ -89,9 +120,9 @@ class GeminiResource():
                 if parent_hostname is None:
                     return None, None
                 joined = urljoin("gemini://{}".format(parent_hostname), url)
-                u = urlsplit(joined, 'gemini')
+                u = urlsplit(joined, "gemini")
                 is_relative = True
-            else: # url does not start with /
+            else:  # url does not start with /
                 # could be: blah.com/test
                 # could be: test
                 url_split = url.split("/")
@@ -99,33 +130,36 @@ class GeminiResource():
                     # prepend with "gemini://" so built-in urlsplit will extract
                     # the host properly, and continue on
                     url = "gemini://{}".format(url)
-                    u = urlsplit(url, 'gemini')
+                    u = urlsplit(url, "gemini")
                 else:
                     # process relative link
                     if fully_qualified_parent_url is None:
                         return None, None
                     joined = urljoin(fully_qualified_parent_url, url)
-                    u = urlsplit(joined, 'gemini')
+                    u = urlsplit(joined, "gemini")
                     is_relative = True
         return u, is_relative
 
-
     def _get_normalized_url(self):
         if not self.is_valid:
             return None
         if self._normalized_url is None:
-            self._normalized_url, self._normalized_host = self._get_normalized_url_and_host()
+            (
+                self._normalized_url,
+                self._normalized_host,
+            ) = self._get_normalized_url_and_host()
         return self._normalized_url
 
-
     def _get_normalized_host(self):
         if not self.is_valid:
             return None
         if self._normalized_host is None:
-            self._normalized_url, self._normalized_host = self._get_normalized_url_and_host()
+            (
+                self._normalized_url,
+                self._normalized_host,
+            ) = self._get_normalized_url_and_host()
         return self._normalized_host
 
-
     def _get_normalized_host_like(self):
         if not self.is_valid:
             return None
@@ -137,7 +171,6 @@ class GeminiResource():
             self._normalized_host_like = normalized_host_like
         return self._normalized_host_like
 
-
     def _get_fetchable_url(self):
         if not self.is_valid:
             return None
@@ -162,27 +195,32 @@ class GeminiResource():
             self._fetchable_url = url
         return self._fetchable_url
 
-
     def _get_indexable_url(self):
         if not self.is_valid:
             return None
         if self._indexable_url is None:
             indexable_url = unquote(self.fetchable_url)
             if self.urlsplit.port == 1965:
-                indexable_url = self.normalized_url.replace(self.urlsplit.hostname.lower() + ":1965", self.urlsplit.hostname.lower(), 1)
+                indexable_url = self.normalized_url.replace(
+                    self.urlsplit.hostname.lower() + ":1965",
+                    self.urlsplit.hostname.lower(),
+                    1,
+                )
             self._indexable_url = indexable_url
         return self._indexable_url
 
-
     def _get_is_root_like(self):
         if self._is_root_like is None:
             is_root_like = False
-            if self.urlsplit.path == "" or self.urlsplit.path == "/" or ROOT_LIKE_ONLY_PATTERN.match(self.urlsplit.path):
+            if (
+                self.urlsplit.path == ""
+                or self.urlsplit.path == "/"
+                or ROOT_LIKE_ONLY_PATTERN.match(self.urlsplit.path)
+            ):
                 is_root_like = True
             self._is_root_like = is_root_like
         return self._is_root_like
 
-
     def _get_is_pikkulog_like(self):
         if self._is_pikkulog_like is None:
             is_pikkulog_like = False
@@ -192,30 +230,39 @@ class GeminiResource():
             self._is_pikkulog_like = is_pikkulog_like
         return self._is_pikkulog_like
 
-
     def _get_is_log_root_like(self):
         if self._is_log_root_like is None:
             is_log_root_like = False
-            if self.urlsplit.path == "" or self.urlsplit.path == "/" or LOG_ROOT_LIKE_PATTERN.match(self.urlsplit.path):
+            if (
+                self.urlsplit.path == ""
+                or self.urlsplit.path == "/"
+                or LOG_ROOT_LIKE_PATTERN.match(self.urlsplit.path)
+            ):
                 is_log_root_like = True
             self._is_log_root_like = is_log_root_like
         return self._is_log_root_like
 
-
     def _get_is_log_post_like(self):
         if self._is_log_post_like is None:
             is_log_post_like = False
             post_like_match = LOG_POST_LIKE_PATTERN.match(self.urlsplit.path)
-            post_like_exclusion_match = LOG_POST_LIKE_EXCLUSION_PATTERN.match(self.urlsplit.path)
-            post_gemlogblue_match = LOG_POST_GEMLOGBLUE_LIKE_PATTERN.match(self.urlsplit.path)
+            post_like_exclusion_match = LOG_POST_LIKE_EXCLUSION_PATTERN.match(
+                self.urlsplit.path
+            )
+            post_gemlogblue_match = LOG_POST_GEMLOGBLUE_LIKE_PATTERN.match(
+                self.urlsplit.path
+            )
             post_boston_match = LOG_POST_BOSTON_LIKE_PATTERN.match(self.urlsplit.path)
 
-            if (post_like_match and not post_like_exclusion_match) or (self.normalized_host == "gemlog.blue" and post_gemlogblue_match) or (self.normalized_host == "gemini.conman.org" and post_boston_match):
+            if (
+                (post_like_match and not post_like_exclusion_match)
+                or (self.normalized_host == "gemlog.blue" and post_gemlogblue_match)
+                or (self.normalized_host == "gemini.conman.org" and post_boston_match)
+            ):
                 is_log_post_like = True
             self._is_log_post_like = is_log_post_like
         return self._is_log_post_like
 
-
     def get_friendly_author(self, content):
         if not self.is_valid:
             return None
@@ -238,7 +285,6 @@ class GeminiResource():
             friendly_author = self.normalized_host
         return friendly_author
 
-
     def get_friendly_title(self, content):
         if not self.is_valid:
             return None
@@ -253,13 +299,18 @@ class GeminiResource():
             # if no content match, try looking in URL
             title_url_match = TITLE_URL_PATTERN.match(self.urlsplit.path)
             if title_url_match:
-                friendly_title = title_url_match[2].replace("-", " ").replace("_", " ").strip().title()
+                friendly_title = (
+                    title_url_match[2]
+                    .replace("-", " ")
+                    .replace("_", " ")
+                    .strip()
+                    .title()
+                )
         if friendly_title is None:
             # if still no match, use URL path
             friendly_title = self.urlsplit.path.lstrip("/")
         return friendly_title
 
-
     def get_default_change_frequency(self, category):
         if not self.is_valid:
             return None
@@ -287,7 +338,6 @@ class GeminiResource():
             self._default_change_frequency = change_frequency
         return self._default_change_frequency
 
-
     def increment_change_frequency(self, existing_change_frequency, category):
         if category == "content":
             if self.is_root_like or self.is_log_root_like:
@@ -309,7 +359,6 @@ class GeminiResource():
         else:
             raise Exception.NameError("Unrecognized resource category")
 
-
     # constructed from fetchable_url
     # does not matter if quoted or unquoted so I choose arbitrarily to
     # standardize on unquoting it.
@@ -333,15 +382,17 @@ class GeminiResource():
         # and a server redirecting to the same URL _with_ a trailing slash.
         return gusmobile.fetch(self.fetchable_url)
 
-
     def _get_normalized_url_and_host(self):
         url_normalized = unquote(self.fetchable_url.lower().rstrip("/"))
         if self.urlsplit.port == 1965:
-            url_normalized = url_normalized.replace(self.urlsplit.hostname.lower() + ":1965", self.urlsplit.hostname.lower(), 1)
+            url_normalized = url_normalized.replace(
+                self.urlsplit.hostname.lower() + ":1965",
+                self.urlsplit.hostname.lower(),
+                1,
+            )
         host_normalized = self.urlsplit.hostname.lower()
         return url_normalized, host_normalized
 
-
     def extract_contained_resources(self, content):
         # this finds all gemini URLs within the content of a given GeminiResource and
         # returns them as a list of new GeminiResources
@@ -349,9 +400,13 @@ class GeminiResource():
             return self.contained_resources
 
         link_pattern = "^=>\s*(\S+)"
-        preformat_pattern = r'^```.*?^```'
-        content_without_preformat = re.sub(preformat_pattern, '', content, flags=re.DOTALL | re.MULTILINE)
-        probable_urls = re.findall(link_pattern, content_without_preformat, re.MULTILINE)
+        preformat_pattern = r"^```.*?^```"
+        content_without_preformat = re.sub(
+            preformat_pattern, "", content, flags=re.DOTALL | re.MULTILINE
+        )
+        probable_urls = re.findall(
+            link_pattern, content_without_preformat, re.MULTILINE
+        )
         resources = []
         for url in probable_urls:
             resource = GeminiResource(
diff --git a/gus/lib/logging.py b/gus/lib/logging.py
@@ -7,11 +7,11 @@ def add_arguments(parser):
     """Add arguments to the given argument argparse parser."""
 
     parser.add_argument(
-        '--logging-config',
-        '-c',
-        dest='logging_ini_fname',
+        "--logging-config",
+        "-c",
+        dest="logging_ini_fname",
         default=False,
-        help='Location of logging configuration file'
+        help="Location of logging configuration file",
     )
 
 
@@ -22,11 +22,10 @@ def handle_arguments(args):
         if os.path.isfile(args.logging_ini_fname):
             logging.config.fileConfig(args.logging_ini_fname)
         else:
-            sys.exit('Can not find logging ini file: %s' %
-                     args.logging_ini_fname)
+            sys.exit("Can not find logging ini file: %s" % args.logging_ini_fname)
 
-    elif os.path.isfile('logging.ini'):
-        logging.config.fileConfig('logging.ini')
+    elif os.path.isfile("logging.ini"):
+        logging.config.fileConfig("logging.ini")
 
 
 def strip_control_chars(s):
diff --git a/gus/lib/misc.py b/gus/lib/misc.py
@@ -8,15 +8,24 @@ License: MIT
 """
 
 SYMBOLS = {
-    'customary'     : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
-    'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
-                       'zetta', 'iotta'),
-    'iec'           : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
-    'iec_ext'       : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
-                       'zebi', 'yobi'),
+    "customary": ("B", "K", "M", "G", "T", "P", "E", "Z", "Y"),
+    "customary_ext": (
+        "byte",
+        "kilo",
+        "mega",
+        "giga",
+        "tera",
+        "peta",
+        "exa",
+        "zetta",
+        "iotta",
+    ),
+    "iec": ("Bi", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"),
+    "iec_ext": ("byte", "kibi", "mebi", "gibi", "tebi", "pebi", "exbi", "zebi", "yobi"),
 }
 
-def bytes2human(n, format='%(value).1f %(symbol)s', symbols='customary'):
+
+def bytes2human(n, format="%(value).1f %(symbol)s", symbols="customary"):
     """
     Convert n bytes into a human readable string based on format.
     symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
@@ -59,7 +68,7 @@ def bytes2human(n, format='%(value).1f %(symbol)s', symbols='customary'):
     symbols = SYMBOLS[symbols]
     prefix = {}
     for i, s in enumerate(symbols[1:]):
-        prefix[s] = 1 << (i+1)*10
+        prefix[s] = 1 << (i + 1) * 10
     for symbol in reversed(symbols[1:]):
         if n >= prefix[symbol]:
             value = float(n) / prefix[symbol]
diff --git a/gus/lib/whoosh_extensions.py b/gus/lib/whoosh_extensions.py
@@ -16,7 +16,12 @@ def UrlAnalyzer():
 
     """
 
-    return RegexTokenizer(expression=":1965|^gemini://|[/\.\?]", gaps=True) | IntraWordFilter() | LowercaseFilter() | StemFilter()
+    return (
+        RegexTokenizer(expression=":1965|^gemini://|[/\.\?]", gaps=True)
+        | IntraWordFilter()
+        | LowercaseFilter()
+        | StemFilter()
+    )
 
 
 class GeminiFormatter(highlight.Formatter):
@@ -35,7 +40,6 @@ class GeminiFormatter(highlight.Formatter):
         # string
         return "%s" % tokentext
 
-
     def format_fragment(self, fragment, replace=False):
         """Returns a formatted version of the given text, using the "token"
         objects in the given :class:`Fragment`.
@@ -57,21 +61,22 @@ class GeminiFormatter(highlight.Formatter):
             if t.startchar < index:
                 continue
             if t.startchar > index:
-                output.append(self._text(text[index:t.startchar]))
+                output.append(self._text(text[index : t.startchar]))
             output.append(self.format_token(text, t, replace))
             index = t.endchar
-        output.append(self._text(text[index:fragment.endchar]))
+        output.append(self._text(text[index : fragment.endchar]))
         output.append("...")
 
         out_string = "".join(output)
-        out_string = out_string.replace("\n", " ").replace('\r', ' ')
-        out_string = ' '.join(out_string.split())
+        out_string = out_string.replace("\n", " ").replace("\r", " ")
+        out_string = " ".join(out_string.split())
         return out_string
 
 
 special_char_pattern = re.compile("[^\w\s,\.;-\?\!']")
 link_pattern = re.compile("://|=>")
 
+
 class GeminiScorer(highlight.FragmentScorer):
     def __call__(self, f):
         # Add up the boosts for the matched terms in this passage
@@ -87,10 +92,12 @@ class GeminiScorer(highlight.FragmentScorer):
         # ascii art, as well as source code (which, I suppose will make snippets
         # lower quality for actual searches for source code, but that is a very
         # small minority of searches in the current state of things).
-        num_special_chars = len(special_char_pattern.findall(f.text[f.startchar:f.endchar]))
+        num_special_chars = len(
+            special_char_pattern.findall(f.text[f.startchar : f.endchar])
+        )
         score -= 4 * num_special_chars + math.pow(num_special_chars, 1.5)
 
-        num_links = len(link_pattern.findall(f.text[f.startchar:f.endchar]))
+        num_links = len(link_pattern.findall(f.text[f.startchar : f.endchar]))
         score -= 30 * num_links
 
         return max(0, score)
diff --git a/serve/constants.py b/serve/constants.py
@@ -44,9 +44,5 @@ QUOTE_BANK = [
         "quote": "The truth will set you free. But not until it is finished with you.",
         "author": "David Foster Wallace",
     },
-    {
-        "quote": "Jazz isn't dead. It just smells funny.",
-        "author": "Frank Zappa",
-    },
-
+    {"quote": "Jazz isn't dead. It just smells funny.", "author": "Frank Zappa",},
 ]
diff --git a/serve/main.py b/serve/main.py
@@ -4,25 +4,13 @@ import jetforce
 
 from . import app, gus
 
+
 def parse_args():
     parser = argparse.ArgumentParser()
 
-    parser.add_argument(
-        "--host",
-        help="Server address to bind to",
-        default="127.0.0.1"
-    )
-    parser.add_argument(
-        "--port",
-        help="Server port to bind to",
-        type=int,
-        default=1965
-    )
-    parser.add_argument(
-        "--hostname",
-        help="Server hostname",
-        default="localhost"
-    )
+    parser.add_argument("--host", help="Server address to bind to", default="127.0.0.1")
+    parser.add_argument("--port", help="Server port to bind to", type=int, default=1965)
+    parser.add_argument("--hostname", help="Server hostname", default="localhost")
     parser.add_argument(
         "--tls-certfile",
         dest="certfile",
diff --git a/serve/models.py b/serve/models.py
@@ -9,11 +9,15 @@ from whoosh.index import open_dir
 from . import constants
 from gus.lib.db_model import init_db, Crawl, Link, Page, Search, Thread
 from gus.lib.gemini import GeminiResource
-from gus.lib.index_statistics import compute_index_statistics, load_all_statistics_from_file
+from gus.lib.index_statistics import (
+    compute_index_statistics,
+    load_all_statistics_from_file,
+)
 from gus.lib.misc import bytes2human
 from gus.lib.whoosh_extensions import GeminiFormatter, GeminiScorer
 
-class GUS():
+
+class GUS:
     def __init__(self):
         self.ix = open_dir(constants.INDEX_DIR)
         self.searcher = self.ix.searcher()
@@ -27,12 +31,15 @@ class GUS():
 
         self.db = init_db(f"{constants.INDEX_DIR}/{constants.DB_FILENAME}")
         self.statistics = compute_index_statistics(self.db)
-        self.statistics_historical_overall = load_all_statistics_from_file(constants.STATISTICS_FILE)
-
+        self.statistics_historical_overall = load_all_statistics_from_file(
+            constants.STATISTICS_FILE
+        )
 
     def init_query_parser(ix):
         or_group = qparser.OrGroup.factory(0.99)
-        query_parser = qparser.MultifieldParser(["content", "url", "prompt"], ix.schema, group=or_group)
+        query_parser = qparser.MultifieldParser(
+            ["content", "url", "prompt"], ix.schema, group=or_group
+        )
         query_parser.add_plugin(qparser.RegexPlugin())
         query_parser.add_plugin(qparser.GtLtPlugin())
         query_parser.remove_plugin_class(qparser.WildcardPlugin)
@@ -40,36 +47,44 @@ class GUS():
         query_parser.remove_plugin_class(qparser.RangePlugin)
         return query_parser
 
-
     def search_index(self, query, requested_page):
         Search.create(query=query, timestamp=datetime.utcnow())
         query = self.query_parser.parse(query)
         results = self.searcher.search_page(query, requested_page, pagelen=10)
         return (
             len(results),
-            [{
-                "score"         : result.score,
-                "indexed_at"    : result["indexed_at"],
-                "url"           : result["url"],
-                "fetchable_url" : result["fetchable_url"],
-                "content_type"  : result["content_type"],
-                "charset"       : result["charset"] if "charset" in result else "none",
-                "size"          : result["size"] if "size" in result else 0,
-                "prompt"        : result["prompt"] if "prompt" in result else "",
-                "highlights"    : self.gemini_highlighter.highlight_hit(result, "content", top=1) if "content" in result and result["content_type"] in ["text/plain", "text/gemini", "text/markdown"] else "",
-                "link_text"     : GUS._get_link_text(result),
-                "backlink_count": result["backlink_count"],
-            } for result in results],
+            [
+                {
+                    "score": result.score,
+                    "indexed_at": result["indexed_at"],
+                    "url": result["url"],
+                    "fetchable_url": result["fetchable_url"],
+                    "content_type": result["content_type"],
+                    "charset": result["charset"] if "charset" in result else "none",
+                    "size": result["size"] if "size" in result else 0,
+                    "prompt": result["prompt"] if "prompt" in result else "",
+                    "highlights": self.gemini_highlighter.highlight_hit(
+                        result, "content", top=1
+                    )
+                    if "content" in result
+                    and result["content_type"]
+                    in ["text/plain", "text/gemini", "text/markdown"]
+                    else "",
+                    "link_text": GUS._get_link_text(result),
+                    "backlink_count": result["backlink_count"],
+                }
+                for result in results
+            ],
         )
 
-
     def get_backlinks(self, url):
         resource = GeminiResource(url)
         if not resource.is_valid:
             return [], []
 
         u = resource.indexable_url.rstrip("/")
-        backlinks_query = Page.raw("""SELECT p_from.url, l.is_cross_host_like
+        backlinks_query = Page.raw(
+            """SELECT p_from.url, l.is_cross_host_like
 FROM page AS p_from
 JOIN indexable_crawl AS ic
 ON ic.page_id == p_from.id
@@ -80,18 +95,22 @@ ON p_to.id == l.to_page_id
 WHERE p_to.url IN (?, ?)
 AND p_from.normalized_url != ?
 GROUP BY p_from.normalized_url
-ORDER BY l.is_cross_host_like, p_from.url ASC""", u, f"{u}/", resource.normalized_url)
+ORDER BY l.is_cross_host_like, p_from.url ASC""",
+            u,
+            f"{u}/",
+            resource.normalized_url,
+        )
         backlinks = backlinks_query.execute()
 
         internal_backlink_urls = [b.url for b in backlinks if not b.is_cross_host_like]
         external_backlink_urls = [b.url for b in backlinks if b.is_cross_host_like]
         return internal_backlink_urls, external_backlink_urls
 
-
     def get_threads(self, sort="recency"):
         sort = sort.lower()
         if sort == "recency":
-            threads_query = Thread.raw("""SELECT t.*
+            threads_query = Thread.raw(
+                """SELECT t.*
   , tp.address
   , tp.friendly_author
   , tp.friendly_title
@@ -111,9 +130,11 @@ JOIN crawl AS c
 ON c.page_id == p.id
 WHERE c.status == 20
 GROUP BY tp.id
-ORDER BY t.updated_at DESC, t.id ASC, tp.address ASC""")
+ORDER BY t.updated_at DESC, t.id ASC, tp.address ASC"""
+            )
         elif sort == "length":
-            threads_query = Thread.raw("""SELECT t.*
+            threads_query = Thread.raw(
+                """SELECT t.*
   , tp.address
   , tp.friendly_author
   , tp.friendly_title
@@ -136,7 +157,8 @@ JOIN crawl AS c
 ON c.page_id == p.id
 WHERE c.status == 20
 GROUP BY tp.id
-ORDER BY t.thread_length DESC, t.updated_at DESC, t.id ASC, tp.address ASC""")
+ORDER BY t.thread_length DESC, t.updated_at DESC, t.id ASC, tp.address ASC"""
+            )
         else:
             threads_query = ""
         threads = []
@@ -144,44 +166,52 @@ ORDER BY t.thread_length DESC, t.updated_at DESC, t.id ASC, tp.address ASC""")
         last_id = None
         for thread_member in threads_query.iterator():
             if thread_member.updated_at.date() != last_date:
-                threads.append({
-                    "threads": [],
-                    "date": thread_member.updated_at,
-                })
+                threads.append(
+                    {"threads": [], "date": thread_member.updated_at,}
+                )
                 last_date = thread_member.updated_at.date()
             if thread_member.id != last_id:
-                threads[-1]["threads"].append({
-                    "members": [],
-                    "updated_at": thread_member.updated_at,
-                })
+                threads[-1]["threads"].append(
+                    {"members": [], "updated_at": thread_member.updated_at,}
+                )
                 last_id = thread_member.id
-            threads[-1]["threads"][-1]["members"].append({
-                "url": thread_member.url,
-                "fetchable_url": thread_member.fetchable_url,
-                "address": thread_member.address,
-                "friendly_author": thread_member.friendly_author,
-                "friendly_title": thread_member.friendly_title,
-                "first_seen": datetime.strptime(thread_member.first_seen, "%Y-%m-%d %H:%M:%S.%f"),
-            })
+            threads[-1]["threads"][-1]["members"].append(
+                {
+                    "url": thread_member.url,
+                    "fetchable_url": thread_member.fetchable_url,
+                    "address": thread_member.address,
+                    "friendly_author": thread_member.friendly_author,
+                    "friendly_title": thread_member.friendly_title,
+                    "first_seen": datetime.strptime(
+                        thread_member.first_seen, "%Y-%m-%d %H:%M:%S.%f"
+                    ),
+                }
+            )
         # return sorted(threads, key=lambda x: (x["updated_at"], ), reverse=True)
         return threads
 
-
     def _get_link_text(result):
         if result["content_type"] == "input":
             prompt_suffix = ": {}".format(result["prompt"])
-            link_text = "{} ({}{})".format(result["url"][9:], result["content_type"], prompt_suffix)
+            link_text = "{} ({}{})".format(
+                result["url"][9:], result["content_type"], prompt_suffix
+            )
         else:
-            lang_str = ", {}".format(result["lang"]) if "lang" in result and result["lang"] != "none" else ""
+            lang_str = (
+                ", {}".format(result["lang"])
+                if "lang" in result and result["lang"] != "none"
+                else ""
+            )
             link_text = "{} ({}, {})".format(
-                result["url"][9:], result["content_type"],
-                bytes2human(result["size"], format="%(value).0f%(symbol)s")
+                result["url"][9:],
+                result["content_type"],
+                bytes2human(result["size"], format="%(value).0f%(symbol)s"),
             )
         return link_text
 
-
     def get_feeds(self):
-        feeds_query = Page.raw("""SELECT DISTINCT p.*
+        feeds_query = Page.raw(
+            """SELECT DISTINCT p.*
 FROM page AS p
 JOIN indexable_crawl AS c
 ON c.page_id == p.id
@@ -190,12 +220,13 @@ OR p.url LIKE '%feed.xml'
 OR p.url LIKE '%.rss'
 OR p.url LIKE '%.atom'
 OR p.content_type IN ('application/atom+xml', 'application/rss+xml')
-""")
+"""
+        )
         return feeds_query.execute()
 
-
     def get_newest_hosts(self):
-        newest_hosts_query = Page.raw("""SELECT p.domain, MIN(c.timestamp) AS first_seen
+        newest_hosts_query = Page.raw(
+            """SELECT p.domain, MIN(c.timestamp) AS first_seen
 FROM page as p
 JOIN indexable_crawl AS ic
 ON ic.page_id == p.id
@@ -204,12 +235,13 @@ ON c.page_id == p.id
 GROUP BY p.domain
 ORDER BY first_seen DESC
 LIMIT 10
-""")
+"""
+        )
         return newest_hosts_query.execute()
 
-
     def get_newest_pages(self):
-        newest_pages_query = Page.raw("""SELECT p.url, p.fetchable_url, MIN(c.timestamp) AS first_seen
+        newest_pages_query = Page.raw(
+            """SELECT p.url, p.fetchable_url, MIN(c.timestamp) AS first_seen
 FROM page as p
 JOIN indexable_crawl AS ic
 ON ic.page_id == p.id
@@ -218,19 +250,19 @@ ON c.page_id == p.id
 GROUP BY p.url
 ORDER BY first_seen DESC
 LIMIT 50
-""")
+"""
+        )
         return newest_pages_query.execute()
 
-
     def get_search_suggestions(self, query):
         suggestions = []
         corrector = self.searcher.corrector("content")
         for query_part in query.split(" "):
             query_part_suggestions = corrector.suggest(query_part, limit=3)
-            suggestions.extend({
-                "raw": suggestion,
-                "quoted": quote(suggestion)
-            } for suggestion in query_part_suggestions)
+            suggestions.extend(
+                {"raw": suggestion, "quoted": quote(suggestion)}
+                for suggestion in query_part_suggestions
+            )
         return suggestions
 
 
diff --git a/serve/views.py b/serve/views.py
@@ -8,7 +8,12 @@ import jinja2
 from jetforce import Request, Response, Status, JetforceApplication
 
 from . import constants
-from .models import compute_verbose, compute_requested_results_page, GUS, process_seed_request
+from .models import (
+    compute_verbose,
+    compute_requested_results_page,
+    GUS,
+    process_seed_request,
+)
 
 TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
 
@@ -19,6 +24,7 @@ template_env = jinja2.Environment(
     lstrip_blocks=True,
 )
 
+
 def datetimeformat(value, format="%Y-%m-%d"):
     return value.strftime(format)
 
@@ -29,8 +35,10 @@ def threadaddressformat(value):
         return "   " * (depth - 1) + "↳"
     return ""
 
-template_env.filters['datetimeformat'] = datetimeformat
-template_env.filters['threadaddressformat'] = threadaddressformat
+
+template_env.filters["datetimeformat"] = datetimeformat
+template_env.filters["threadaddressformat"] = threadaddressformat
+
 
 def render_template(name: str, *args, **kwargs) -> str:
     """
@@ -38,9 +46,11 @@ def render_template(name: str, *args, **kwargs) -> str:
     """
     return template_env.get_template(name).render(*args, **kwargs)
 
+
 app = JetforceApplication()
 gus = GUS()
 
+
 @app.route("/favicon.txt", strict_trailing_slash=False)
 def favicon(request):
     return Response(Status.SUCCESS, "text/plain", "🔭")
@@ -58,104 +68,128 @@ def add_seed(request):
 
 @app.route("/statistics", strict_trailing_slash=False)
 def statistics(request):
-    body = render_template("statistics.gmi",
-                           statistics=gus.statistics,
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "statistics.gmi",
+        statistics=gus.statistics,
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/statistics/historical/overall", strict_trailing_slash=False)
 def statistics(request):
-    body = render_template("statistics_historical_overall.gmi",
-                           statistics_historical_overall=gus.statistics_historical_overall,
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "statistics_historical_overall.gmi",
+        statistics_historical_overall=gus.statistics_historical_overall,
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/known-hosts", strict_trailing_slash=False)
 def known_hosts(request):
-    body = render_template("known_hosts.gmi",
-                           # TODO: remove this `sorted` after the next index generation
-                           known_hosts=sorted(gus.statistics["domains"]),
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "known_hosts.gmi",
+        # TODO: remove this `sorted` after the next index generation
+        known_hosts=sorted(gus.statistics["domains"]),
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/newest-hosts", strict_trailing_slash=False)
 def newest_hosts(request):
-    body = render_template("newest_hosts.gmi",
-                           newest_hosts=gus.get_newest_hosts(),
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "newest_hosts.gmi",
+        newest_hosts=gus.get_newest_hosts(),
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/newest-pages", strict_trailing_slash=False)
 def newest_pages(request):
-    body = render_template("newest_pages.gmi",
-                           newest_pages=gus.get_newest_pages(),
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "newest_pages.gmi",
+        newest_pages=gus.get_newest_pages(),
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/known-feeds", strict_trailing_slash=False)
 def known_feeds(request):
-    body = render_template("known_feeds.gmi",
-                           known_feeds=gus.get_feeds(),
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "known_feeds.gmi",
+        known_feeds=gus.get_feeds(),
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("", strict_trailing_slash=False)
 def index(request):
-    body = render_template("index.gmi",
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "index.gmi",
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/about", strict_trailing_slash=False)
 def index(request):
-    body = render_template("about.gmi",
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "about.gmi",
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/documentation/searching", strict_trailing_slash=False)
 def documentation_searching(request):
-    body = render_template("documentation/searching.gmi",
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "documentation/searching.gmi",
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/documentation/indexing", strict_trailing_slash=False)
 def documentation_indexing(request):
-    body = render_template("documentation/indexing.gmi",
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "documentation/indexing.gmi",
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/documentation/backlinks", strict_trailing_slash=False)
 def documentation_backlinks(request):
-    body = render_template("documentation/backlinks.gmi",
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "documentation/backlinks.gmi",
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
 @app.route("/news", strict_trailing_slash=False)
 def index(request):
-    body = render_template("news.gmi",
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "news.gmi",
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
 
 
@@ -171,23 +205,27 @@ def search(request):
             current_page = min(requested_page, num_pages)
             if num_results == 0:
                 current_page = 0
-            body = render_template("search.gmi",
-                                   query=request.query,
-                                   quoted_query=quote(request.query),
-                                   verbose=verbose,
-                                   num_results=num_results,
-                                   results=results,
-                                   current_page=current_page,
-                                   num_pages=num_pages,
-                                   index_modification_time=gus.statistics["index_modification_time"],
-                                   quote=random.choice(constants.QUOTE_BANK))
+            body = render_template(
+                "search.gmi",
+                query=request.query,
+                quoted_query=quote(request.query),
+                verbose=verbose,
+                num_results=num_results,
+                results=results,
+                current_page=current_page,
+                num_pages=num_pages,
+                index_modification_time=gus.statistics["index_modification_time"],
+                quote=random.choice(constants.QUOTE_BANK),
+            )
         else:
             search_suggestions = gus.get_search_suggestions(request.query)
-            body = render_template("search_suggestions.gmi",
-                                   query=request.query,
-                                   search_suggestions=search_suggestions,
-                                   index_modification_time=gus.statistics["index_modification_time"],
-                                   quote=random.choice(constants.QUOTE_BANK))
+            body = render_template(
+                "search_suggestions.gmi",
+                query=request.query,
+                search_suggestions=search_suggestions,
+                index_modification_time=gus.statistics["index_modification_time"],
+                quote=random.choice(constants.QUOTE_BANK),
+            )
         return Response(Status.SUCCESS, "text/gemini", body)
     else:
         return Response(Status.INPUT, "Search query")
@@ -213,12 +251,14 @@ def backlinks(request):
     if request.query:
         url = unquote(request.query)
         internal_backlinks, external_backlinks = gus.get_backlinks(url)
-        body = render_template("backlinks.gmi",
-                               url=url,
-                               internal_backlinks=internal_backlinks,
-                               external_backlinks=external_backlinks,
-                               index_modification_time=gus.statistics["index_modification_time"],
-                               quote=random.choice(constants.QUOTE_BANK))
+        body = render_template(
+            "backlinks.gmi",
+            url=url,
+            internal_backlinks=internal_backlinks,
+            external_backlinks=external_backlinks,
+            index_modification_time=gus.statistics["index_modification_time"],
+            quote=random.choice(constants.QUOTE_BANK),
+        )
         return Response(Status.SUCCESS, "text/gemini", body)
     else:
         return Response(Status.INPUT, "Gemini URL")
@@ -228,9 +268,11 @@ def backlinks(request):
 def threads(request):
     sort = request.query or "recency"
     threads = gus.get_threads(sort)
-    body = render_template("threads.gmi",
-                           threads=threads,
-                           sort=sort,
-                           index_modification_time=gus.statistics["index_modification_time"],
-                           quote=random.choice(constants.QUOTE_BANK))
+    body = render_template(
+        "threads.gmi",
+        threads=threads,
+        sort=sort,
+        index_modification_time=gus.statistics["index_modification_time"],
+        quote=random.choice(constants.QUOTE_BANK),
+    )
     return Response(Status.SUCCESS, "text/gemini", body)
diff --git a/tests/gus/lib/test_gemini.py b/tests/gus/lib/test_gemini.py
@@ -3,27 +3,31 @@ from gus.lib.gemini import GeminiResource
 
 
 class TestGeminiResource(unittest.TestCase):
-
     def test_extract_contained_resources(self):
-        url = 'gemini://host'
+        url = "gemini://host"
 
         # no content
-        resources = GeminiResource(url).extract_contained_resources('')
+        resources = GeminiResource(url).extract_contained_resources("")
         self.assertEqual(resources, [])
 
         # not a link
-        resources = GeminiResource(url).extract_contained_resources(' => link')
+        resources = GeminiResource(url).extract_contained_resources(" => link")
         self.assertEqual(resources, [])
-        resources = GeminiResource(url).extract_contained_resources('```\n=> preformatted\n```')
+        resources = GeminiResource(url).extract_contained_resources(
+            "```\n=> preformatted\n```"
+        )
         self.assertEqual(resources, [])
 
         # some links
-        resources = GeminiResource(url).extract_contained_resources('=> link\ntext\n=> other')
+        resources = GeminiResource(url).extract_contained_resources(
+            "=> link\ntext\n=> other"
+        )
         self.assertEqual(len(resources), 2)
-        self.assertEqual(resources[0].raw_url, 'link')
-        self.assertEqual(resources[1].raw_url, 'other')
+        self.assertEqual(resources[0].raw_url, "link")
+        self.assertEqual(resources[1].raw_url, "other")
 
-        resources = GeminiResource(url).extract_contained_resources("""
+        resources = GeminiResource(url).extract_contained_resources(
+            """
 # title
 text
 => link
@@ -32,7 +36,8 @@ text
 => no link
 ```
 => other
-        """)
+        """
+        )
         self.assertEqual(len(resources), 2)
-        self.assertEqual(resources[0].raw_url, 'link')
-        self.assertEqual(resources[1].raw_url, 'other')
+        self.assertEqual(resources[0].raw_url, "link")
+        self.assertEqual(resources[1].raw_url, "other")

	geminispace.info gemini search engine
	git clone https://git.clttr.info/geminispace.info.git
	Log (Feed) \| Files \| Refs (Tags) \| README \| LICENSE

M	gus/__init__.py	\|	2	+-
M	gus/build_index.py	\|	104	+++++++++++++++++++++++++++++++++++++-------------------------------------------
M	gus/crawl.py	\|	346	++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
M	gus/lib/db_model.py	\|	25	+++++++++++++++++--------
M	gus/lib/domain.py	\|	1	+
M	gus/lib/gemini.py	\|	151	++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
M	gus/lib/logging.py	\|	15	+++++++--------
M	gus/lib/misc.py	\|	25	+++++++++++++++++--------
M	gus/lib/whoosh_extensions.py	\|	23	+++++++++++++++--------
M	serve/constants.py	\|	6	+-----
M	serve/main.py	\|	20	++++----------------
M	serve/models.py	\|	156	++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M	serve/views.py	\|	186	++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
M	tests/gus/lib/test_gemini.py	\|	29	+++++++++++++++++------------