geminispace.info

gemini search engine
git clone https://git.clttr.info/geminispace.info.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

commit 39010248c195bce521a2aaf59cc3a72a7523125a
parent c341bb82ae237de0fa3aff8b5bec1b3d5efa791c
Author: Natalie Pendragon <natpen@natpen.net>
Date:   Thu, 23 Jul 2020 14:40:17 -0400

[build_index] [serve] Distinguish cross-capsule backlinks

Diffstat:
Mgus/build_index.py | 7++++---
Mgus/lib/db_model.py | 7+++++++
Mserve/models.py | 12+++++++-----
Mserve/templates/about.gmi | 2++
Mserve/templates/backlinks.gmi | 47+++++++++++++++++++++++++++++++++++++++++------
Mserve/templates/news.gmi | 3+++
Mserve/templates/search.gmi | 4++--
Mserve/views.py | 5+++--
8 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/gus/build_index.py b/gus/build_index.py @@ -117,7 +117,7 @@ def index_page(page, indexed_urls): return print(page.url) u = page.url.rstrip("/") - backlinks = Page.raw("""SELECT p_from.url + external_backlinks = Page.raw("""SELECT p_from.url FROM page AS p_from JOIN indexable_crawl AS ic ON ic.page_id == p_from.id @@ -126,10 +126,11 @@ ON l.from_page_id == p_from.id JOIN page as p_to ON p_to.id == l.to_page_id WHERE p_to.url IN (?, ?) +AND l.is_cross_host_like == 1 GROUP BY p_from.normalized_url""", u, f"{u}/") - backlink_urls = [b.url for b in backlinks.execute()] - backlink_count = len(list(set(backlink_urls))) + backlink_urls = [b.url for b in external_backlinks.execute()] + backlink_count = len(backlink_urls) document = { "url_id": page.url, diff --git a/gus/lib/db_model.py b/gus/lib/db_model.py @@ -18,6 +18,13 @@ def init_db(filename=":memory:"): db = SqliteDatabase(filename) db.bind(models) db.create_tables(models) + db.execute_sql("""CREATE VIEW IF NOT EXISTS indexable_crawl AS +SELECT c.* FROM ( + SELECT crawl.*, row_number() + OVER (PARTITION BY page_id ORDER BY timestamp DESC) AS rank + FROM crawl) AS c +WHERE c.rank < 3 +AND c.status == 20;""") return db diff --git a/serve/models.py b/serve/models.py @@ -64,10 +64,10 @@ class GUS(): def get_backlinks(self, url): resource = GeminiResource(url) if not resource.is_valid: - return [] + return [], [] u = resource.indexable_url.rstrip("/") - backlinks = Page.raw("""SELECT p_from.url + backlinks = Page.raw("""SELECT p_from.url, l.is_cross_host_like FROM page AS p_from JOIN indexable_crawl AS ic ON ic.page_id == p_from.id @@ -76,10 +76,12 @@ ON l.from_page_id == p_from.id JOIN page as p_to ON p_to.id == l.to_page_id WHERE p_to.url IN (?, ?) -GROUP BY p_from.normalized_url""", u, f"{u}/") +GROUP BY p_from.normalized_url +ORDER BY l.is_cross_host_like, p_from.url ASC""", u, f"{u}/") - backlink_urls = [b.url for b in backlinks.execute()] - return list(set(backlink_urls)) + internal_backlink_urls = [b.url for b in backlinks.execute() if not b.is_cross_host_like] + external_backlink_urls = [b.url for b in backlinks.execute() if b.is_cross_host_like] + return internal_backlink_urls, external_backlink_urls def _get_link_text(result): diff --git a/serve/templates/about.gmi b/serve/templates/about.gmi @@ -62,4 +62,6 @@ The URL structure for retrieving a certain URL's backlinks page is predictable, => gemini://gus.guru/backlinks?gus.guru +Note the distinction between "internal" and "cross-capsule" backlinks. Internal backlinks are backlinks from within your own capsule to the given page. Cross-capsule backlinks are backlinks from other users' capsules. + {% include 'fragments/footer.gmi' %} diff --git a/serve/templates/backlinks.gmi b/serve/templates/backlinks.gmi @@ -1,16 +1,51 @@ {% include 'fragments/header.gmi' %} -{% if backlinks | length > 1 %} -## {{ backlinks|length }} backlinks for {{ url }} +## Backlinks for {{ url }} -{% elif backlinks | length > 0 %} -## {{ backlinks|length }} backlink for {{ url }} +{% if external_backlinks | length > 1 %} +### {{ external_backlinks|length }} cross-capsule backlinks + +{% elif external_backlinks | length > 0 %} +### {{ external_backlinks|length }} cross-capsule backlink + +{% else %} +### No cross-capsule backlinks + +Instead, here's a duck: +``` + ,~~. + ( 6 )-_, + (\___ )=='-' + \ . ) ) + \ `-' / + ~`~'`~'`~'`~`~ +``` +{% endif %} +{% for backlink in external_backlinks %} +=> {{ backlink }} {{ backlink[9:] }} +{% endfor %} + +{% if internal_backlinks | length > 1 %} +### {{ internal_backlinks|length }} internal backlinks + +{% elif internal_backlinks | length > 0 %} +### {{ internal_backlinks|length }} internal backlink {% else %} -## No backlinks for {{ url }} +### No internal backlinks + +Instead, here's a duck: +``` + ,~~. + ( 6 )-_, + (\___ )=='-' + \ . ) ) + \ `-' / + ~`~'`~'`~'`~`~ +``` {% endif %} -{% for backlink in backlinks %} +{% for backlink in internal_backlinks %} => {{ backlink }} {{ backlink[9:] }} {% endfor %} diff --git a/serve/templates/news.gmi b/serve/templates/news.gmi @@ -3,6 +3,9 @@ ## News +### 2020-07-23 +Added distinction between internal and external backlinks. See gus.guru/about for full documentation. + ### 2020-07-21 Updated certificate for gus.guru. I finally got around to reading some of the helpful list and gemlog discussion concerning EC certificates, and have made the switch for GUS. Comparing to the previous full chain certificate from Let's Encrypt, the new one is about 7x smaller! diff --git a/serve/templates/search.gmi b/serve/templates/search.gmi @@ -14,9 +14,9 @@ {% for result in results %} => {{ result["fetchable_url"] }} {{ result["link_text"] }} {% if result["backlink_count"] > 1 %} -=> /backlinks?{{ result["url"][9:] | urlencode }} {{ result["backlink_count"] }} backlinks +=> /backlinks?{{ result["url"][9:] | urlencode }} {{ result["backlink_count"] }} cross-capsule backlinks {% elif result["backlink_count"] > 0 %} -=> /backlinks?{{ result["url"][9:] | urlencode }} {{ result["backlink_count"] }} backlink +=> /backlinks?{{ result["url"][9:] | urlencode }} {{ result["backlink_count"] }} cross-capsule backlink {% endif %} {% if verbose %} * Score : {{ "{:.2f}".format(result["score"]) }} diff --git a/serve/views.py b/serve/views.py @@ -123,10 +123,11 @@ def search(request): def backlinks(request): if request.query: url = unquote(request.query) - backlinks = gus.get_backlinks(url) + internal_backlinks, external_backlinks = gus.get_backlinks(url) body = render_template("backlinks.gmi", url=url, - backlinks=backlinks, + internal_backlinks=internal_backlinks, + external_backlinks=external_backlinks, index_modification_time=gus.statistics["index_modification_time"]) return Response(Status.SUCCESS, "text/gemini", body) else: