commit 8c21cb40de18a1fc4f8cc0ee540a1c021d08d50a
parent 01370e679836f2a4a9a78b9e08ff08b5d9469329
Author: René Wagner <rwagner@rw-net.de>
Date: Sun, 29 Nov 2020 10:03:54 +0100
integrate handling gemini:// uris using gcat
improve error handling and restructure orrg.pl
Diffstat:
A | .gitignore | | | 1 | + |
M | README.md | | | 5 | +++-- |
A | gcat | | | 72 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | orrg.pl | | | 69 | ++++++++++++++++++++++++++++++++++++++++++++++----------------------- |
4 files changed, 122 insertions(+), 25 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+data/*
diff --git a/README.md b/README.md
@@ -14,13 +14,13 @@ Preferably over the [mailing list](https://lists.sr.ht/~rwa/gmni-perl-cgi).
## features
-- load an atom/rss feed from https (http is not supported!) given by user input
+- load an atom/rss feed from gemini or https (http is deliberately not supported!) given by user input
- render feed (channel info & entrys) as a gemini site
- include links to originating site and every article
- strip html tags from item description
- lists of popular and recently visited feeds
-Fetching feeds from gemini is currently not supported -> https://todo.sr.ht/~rwa/gmni-perl/4
+gemini-support is currently implemented using [gcat](https://github.com/aaronjanse/gcat) till popular perl libs have catched up. :)
## non-features
@@ -43,3 +43,4 @@ Given this restrictions is not suitable for highly traffic feeds which are updat
- DateTime
- DateTime::Format::ISO8601
- HTML::Strip
+- Python 3 for `gcat`
diff --git a/gcat b/gcat
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+import cgi
+import os
+import socket
+import ssl
+import sys
+import urllib.parse
+
+def absolutise_url(base, relative):
+ # Absolutise relative links
+ if "://" not in relative:
+ # Python's URL tools somehow only work with known schemes?
+ base = base.replace("gemini://","http://")
+ relative = urllib.parse.urljoin(base, relative)
+ relative = relative.replace("http://", "gemini://")
+ return relative
+
+if len(sys.argv) != 2:
+ print("Usage:")
+ print("gcat gemini://gemini.circumlunar.space")
+ sys.exit(1)
+
+url = sys.argv[1]
+parsed_url = urllib.parse.urlparse(url)
+if parsed_url.scheme == "":
+ url = "gemini://"+url
+ parsed_url = urllib.parse.urlparse(url)
+
+if parsed_url.scheme != "gemini":
+ print("Sorry, Gemini links only.")
+ sys.exit(1)
+if parsed_url.port is not None:
+ useport = parsed_url.port
+else:
+ useport = 1965
+# Do the Gemini transaction
+while True:
+ s = socket.create_connection((parsed_url.hostname, useport))
+ context = ssl.SSLContext()
+ context.check_hostname = False
+ context.verify_mode = ssl.CERT_NONE
+ s = context.wrap_socket(s, server_hostname = parsed_url.netloc)
+ s.sendall((url + '\r\n').encode("UTF-8"))
+ # Get header and check for redirects
+ fp = s.makefile("rb")
+ header = fp.readline()
+ print(header.decode("UTF-8"), end="")
+ header = header.decode("UTF-8").strip()
+ status, mime = header.split()[:2]
+ # Handle input requests
+ if status.startswith("1"):
+ # Prompt
+ query = input("INPUT" + mime + "> ")
+ url += "?" + urllib.parse.quote(query) # Bit lazy...
+ # Follow redirects
+ elif status.startswith("3"):
+ url = absolutise_url(url, mime)
+ parsed_url = urllib.parse.urlparse(url)
+ # Otherwise, we're done.
+ else:
+ break
+# Fail if transaction was not successful
+if status.startswith("2"):
+ if mime.startswith("text/"):
+ # Decode according to declared charset
+ mime, mime_opts = cgi.parse_header(mime)
+ body = fp.read()
+ body = body.decode(mime_opts.get("charset","UTF-8"))
+ print(body, end="")
+ else:
+ print(fp.read(), end="")
diff --git a/orrg.pl b/orrg.pl
@@ -26,7 +26,7 @@ if (!defined($ENV{'SERVER_PROTOCOL'}) || $ENV{'SERVER_PROTOCOL'} ne 'GEMINI')
my $query = lc(uri_unescape($ENV{'QUERY_STRING'}));
-if ($query eq '' || $query !~ /^https\:\/\//) {
+if ($query eq '' || $query !~ /^(https|gemini)\:\/\//) {
write_response('INPUT', 'Paste the URI of the rss feed you want to read:', undef);
}
@@ -39,40 +39,63 @@ sub create_response
my ( $qs ) = @_;
my @body = ();
- my $feed = XML::FeedPP->new($qs, utf8_flag => 1);
-
+ my $feed = feed_get($qs);
if ( !defined($feed) ) {
push @body, ('# orrg error', '', 'The requested feed could not be loaded. :(', '', '=> '. $qs .' open feed in browser');
return @body;
}
-
- recent_add($qs, $feed->title);
- popular_add($qs, $feed->title);
+
push @body, '# '. $feed->title;
push @body, 'fetched '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime());
$feed->description eq '' or push @body, ('', $feed->description);
$feed->image eq '' or push @body, '=> '. $feed->image .' feed image';
$feed->link eq '' or push @body, ('=> '.$feed->link.' open website', '');
+
push @body, ('## recent feed items', '');
+ foreach my $it ($feed->get_item()) { push @body, @{item($it)}; }
+
+ push @body, ('', '', '=> index.pl [home]');
+ return @body;
+}
+
+sub item
+{
+ my ($it) = @_;
my $hs = HTML::Strip->new(emit_spaces => 0, auto_reset => 1);
- foreach my $it ($feed->get_item()) {
- push @body, ($it->description ne '' || $it->pubDate ne '')? '### '. $it->title : $it->title;
- if ($it->pubDate ne '') {
- my $dt = DateTime::Format::ISO8601->parse_datetime($it->pubDate);
- push @body, 'published '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($dt->epoch));
- push @body, '';
- }
- if ($it->description ne '') {
- my $desc = $it->description;
- chomp $desc;
- $desc =~ s/\<li\>/* /ig;
- $desc =~ s/\<br \/\>/\r\n/ig;
- push @body, $hs->parse($desc);
- }
- $it->link eq '' or push @body, ('=> '.$it->link.' open entry in browser', '');
+ my @item = ();
+ push @item, ($it->description ne '' || $it->pubDate ne '')? '### '. $it->title : $it->title;
+ if ($it->pubDate ne '') {
+ my $dt = DateTime::Format::ISO8601->parse_datetime($it->pubDate);
+ push @item, 'published '. strftime('%Y-%m-%dT%H:%M:%SZ', gmtime($dt->epoch));
+ push @item, '';
}
+ if ($it->description ne '') {
+ my $desc = $it->description;
+ chomp $desc;
+ $desc =~ s/\<li\>/* /ig;
+ $desc =~ s/\<br \/\>/\r\n/ig;
+ push @item, $hs->parse($desc);
+ }
+ $it->link eq '' or push @item, ('=> '.$it->link.' open entry in browser', '');
- push @body, ('', '', '=> index.pl [home]');
- return @body;
+ return \@item;
+}
+
+sub feed_get
+{
+ my ( $query ) = @_;
+
+ my $feed;
+ if ( $query =~ /^https\:\/\// ) { $feed = XML::FeedPP->new($query, utf8_flag => 1); }
+ if ( $query =~ /^gemini\:\/\// ) {
+ my $content = `./gcat $query`;
+ $content =~ /20\W/ or return undef;
+ $content =~ s/^[0-9]{0,2}\W.+\r\n//;
+ $feed = XML::FeedPP->new($content, -type => 'string');
+ }
+
+ recent_add($query, $feed->title);
+ popular_add($query, $feed->title);
+ return $feed;
}