integrate handling gemini:// uris using gcat - orrg - online rss & atom feed reader for gemini

commit 8c21cb40de18a1fc4f8cc0ee540a1c021d08d50a
parent 01370e679836f2a4a9a78b9e08ff08b5d9469329
Author: René Wagner <rwagner@rw-net.de>
Date:   Sun, 29 Nov 2020 10:03:54 +0100

integrate handling gemini:// uris using gcat

improve error handling and restructure orrg.pl

Diffstat:
A .gitignore  | 1 +
M README.md  | 5 +++--
A gcat  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M orrg.pl  | 69 ++++++++++++++++++++++++++++++++++++++++++++++-----------------------

4 files changed, 122 insertions(+), 25 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+data/*
diff --git a/README.md b/README.md
@@ -14,13 +14,13 @@ Preferably over the [mailing list](https://lists.sr.ht/~rwa/gmni-perl-cgi).
 
 ## features
 
-- load an atom/rss feed from https (http is not supported!) given by user input
+- load an atom/rss feed from gemini or https (http is deliberately not supported!) given by user input
 - render feed (channel info & entrys) as a gemini site
 	- include links to originating site and every article
 	- strip html tags from item description
 - lists of popular and recently visited feeds
 
-Fetching feeds from gemini is currently not supported -> https://todo.sr.ht/~rwa/gmni-perl/4
+gemini-support is currently implemented using [gcat](https://github.com/aaronjanse/gcat) till popular perl libs have catched up. :)
 
 ## non-features
 
@@ -43,3 +43,4 @@ Given this restrictions is not suitable for highly traffic feeds which are updat
 	- DateTime
 	- DateTime::Format::ISO8601
 	- HTML::Strip
+- Python 3 for `gcat`
diff --git a/gcat b/gcat
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+import cgi
+import os
+import socket
+import ssl
+import sys
+import urllib.parse
+
+def absolutise_url(base, relative):
+    # Absolutise relative links
+    if "://" not in relative:
+        # Python's URL tools somehow only work with known schemes?
+        base = base.replace("gemini://","http://")
+        relative = urllib.parse.urljoin(base, relative)
+        relative = relative.replace("http://", "gemini://")
+    return relative
+
+if len(sys.argv) != 2:
+    print("Usage:")
+    print("gcat gemini://gemini.circumlunar.space")
+    sys.exit(1)
+
+url = sys.argv[1]
+parsed_url = urllib.parse.urlparse(url)
+if parsed_url.scheme == "":
+    url = "gemini://"+url
+    parsed_url = urllib.parse.urlparse(url)
+
+if parsed_url.scheme != "gemini":
+    print("Sorry, Gemini links only.")
+    sys.exit(1)
+if parsed_url.port is not None:
+    useport = parsed_url.port
+else:
+    useport = 1965
+# Do the Gemini transaction
+while True:
+    s = socket.create_connection((parsed_url.hostname, useport))
+    context = ssl.SSLContext()
+    context.check_hostname = False
+    context.verify_mode = ssl.CERT_NONE
+    s = context.wrap_socket(s, server_hostname = parsed_url.netloc)
+    s.sendall((url + '\r\n').encode("UTF-8"))
+    # Get header and check for redirects
+    fp = s.makefile("rb")
+    header = fp.readline()
+    print(header.decode("UTF-8"), end="")
+    header = header.decode("UTF-8").strip()
+    status, mime = header.split()[:2]
+    # Handle input requests
+    if status.startswith("1"):
+        # Prompt
+        query = input("INPUT" + mime + "> ")
+        url += "?" + urllib.parse.quote(query) # Bit lazy...
+    # Follow redirects
+    elif status.startswith("3"):
+        url = absolutise_url(url, mime)
+        parsed_url = urllib.parse.urlparse(url)
+    # Otherwise, we're done.
+    else:
+        break
+# Fail if transaction was not successful
+if status.startswith("2"):
+    if mime.startswith("text/"):
+        # Decode according to declared charset
+        mime, mime_opts = cgi.parse_header(mime)
+        body = fp.read()
+        body = body.decode(mime_opts.get("charset","UTF-8"))
+        print(body, end="")
+    else:
+        print(fp.read(), end="")
diff --git a/orrg.pl b/orrg.pl
@@ -26,7 +26,7 @@ if (!defined($ENV{'SERVER_PROTOCOL'}) || $ENV{'SERVER_PROTOCOL'} ne 'GEMINI')
 
 my $query = lc(uri_unescape($ENV{'QUERY_STRING'}));
 
-if ($query eq '' || $query !~ /^https\:\/\//) {
+if ($query eq '' || $query !~ /^(https|gemini)\:\/\//) {
 	write_response('INPUT', 'Paste the URI of the rss feed you want to read:', undef);
 }
 
@@ -39,40 +39,63 @@ sub create_response
 	my ( $qs ) = @_;
 	my @body = ();
 
-	my $feed = XML::FeedPP->new($qs, utf8_flag => 1);
-
+	my $feed = feed_get($qs);
 	if ( !defined($feed) ) {
 		push @body, ('# orrg error', '', 'The requested feed could not be loaded. :(', '', '=> '. $qs .' open feed in browser');
 		return @body; 
 	}
-
-	recent_add($qs, $feed->title);
-	popular_add($qs, $feed->title);
+	
 	push @body, '# '. $feed->title;
 	push @body, 'fetched '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime());
 	$feed->description eq '' or push @body, ('', $feed->description);
 	$feed->image eq '' or push @body, '=> '. $feed->image .' feed image';
 	$feed->link eq '' or push @body, ('=> '.$feed->link.' open website', '');
+	
 	push @body, ('## recent feed items', '');
+	foreach my $it ($feed->get_item()) { push @body, @{item($it)};	}
+
+	push @body, ('', '', '=> index.pl [home]');
+	return @body;
+}
+
+sub item
+{
+	my ($it) = @_;
 
 	my $hs = HTML::Strip->new(emit_spaces => 0, auto_reset => 1);
-	foreach my $it ($feed->get_item()) {
-		push @body, ($it->description ne '' || $it->pubDate ne '')? '### '. $it->title : $it->title;
-		if ($it->pubDate ne '') {
-			my $dt = DateTime::Format::ISO8601->parse_datetime($it->pubDate);
-			push @body, 'published '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($dt->epoch));
-			push @body, '';
-		}
-		if ($it->description ne '') {
-			my $desc = $it->description;
-			chomp $desc;
-			$desc =~ s/\<li\>/* /ig;
-			$desc =~ s/\<br \/\>/\r\n/ig;
- 			push @body, $hs->parse($desc);
-		}
-		$it->link eq '' or push @body, ('=> '.$it->link.' open entry in browser', '');
+	my @item = ();
+	push @item, ($it->description ne '' || $it->pubDate ne '')? '### '. $it->title : $it->title;
+	if ($it->pubDate ne '') {
+		my $dt = DateTime::Format::ISO8601->parse_datetime($it->pubDate);
+		push @item, 'published '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($dt->epoch));
+		push @item, '';
 	}
+	if ($it->description ne '') {
+		my $desc = $it->description;
+		chomp $desc;
+		$desc =~ s/\<li\>/* /ig;
+		$desc =~ s/\<br \/\>/\r\n/ig;
+ 		push @item, $hs->parse($desc);
+	}
+	$it->link eq '' or push @item, ('=> '.$it->link.' open entry in browser', '');
 
-	push @body, ('', '', '=> index.pl [home]');
-	return @body;
+	return \@item;
+}
+
+sub feed_get
+{
+	my ( $query ) = @_;
+
+	my $feed;
+	if ( $query =~ /^https\:\/\// ) { $feed = XML::FeedPP->new($query, utf8_flag => 1); }
+	if ( $query =~ /^gemini\:\/\// ) {
+		my $content = `./gcat $query`;
+		$content =~ /20\W/ or return undef;
+		$content =~ s/^[0-9]{0,2}\W.+\r\n//;
+		$feed = XML::FeedPP->new($content, -type => 'string'); 
+	}
+	
+	recent_add($query, $feed->title);
+	popular_add($query, $feed->title);
+	return $feed;
 }

	orrg online rss & atom feed reader for gemini
	git clone https://git.clttr.info/orrg.git
	Log (Feed) \| Files \| Refs (Tags) \| README \| LICENSE

A	.gitignore	\|	1	+
M	README.md	\|	5	+++--
A	gcat	\|	72	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	orrg.pl	\|	69	++++++++++++++++++++++++++++++++++++++++++++++-----------------------