fix wrong embedding of excludes - geminispace.info

commit 3f7c0f84f9d039d4559225c7e2e97585c7fd8bc0
parent 8b004af54d160a78fdb6d261d4e2478483d2c082
Author: René Wagner <rwa@clttr.info>
Date:   Thu, 27 May 2021 15:24:13 +0200

fix wrong embedding of excludes

Diffstat:
M gus/build_index.py  | 6 +++++-
M gus/crawl.py  | 6 +++---

2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/gus/build_index.py b/gus/build_index.py
@@ -120,11 +120,15 @@ GROUP BY p.normalized_url""", constants.MAXIMUM_TEXT_PAGE_SIZE
     for page in pages.iterator():
         index_page(index, page, indexed_urls)
 
+    try:
+        index.close()
+    except Exception as e:
+        logging.error('Closing of inde failed: %s', e);
+ 
     index_statistics = compute_index_statistics(db)
     log_index_statistics(index_statistics)
     persist_statistics(index_statistics, None, should_run_destructive, "statistics.csv")
 
-    index.close()
     logging.info("Finished!")
 
 
diff --git a/gus/crawl.py b/gus/crawl.py
@@ -228,15 +228,15 @@ def index_content(resource, response):
 
 def should_skip(resource):
     should_skip = False
-    for excluded_prefix in excludes.EXCLUDED_URL_PREFIXES:
+    for excluded_prefix in EXCLUDED_URL_PREFIXES:
         if resource.normalized_url.startswith(excluded_prefix):
             should_skip = True
             break
-    for excluded_path in excludes.EXCLUDED_URL_PATHS:
+    for excluded_path in EXCLUDED_URL_PATHS:
         if resource.urlsplit.path.lower().endswith(excluded_path):
             should_skip = True
             break
-    m = excludes.EXCLUDED_URL_PATTERN.match(resource.normalized_url)
+    m = EXCLUDED_URL_PATTERN.match(resource.normalized_url)
     if m:
         should_skip = True
     return should_skip

	geminispace.info gemini search engine
	git clone https://git.clttr.info/geminispace.info.git
	Log (Feed) \| Files \| Refs (Tags) \| README \| LICENSE