commit 48d0feed6d097c54662a7f231c7bc4704837f023
parent 33495e8dd86139cafade2888227e37b1572d18ea
Author: Drew DeVault <sir@cmpwn.com>
Date: Sun, 20 Sep 2020 17:47:14 -0400
Initial pass on text/gemini parser
Diffstat:
4 files changed, 208 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
@@ -7,6 +7,7 @@ gmni() {
src/client.c \
src/escape.c \
src/gmni.c \
+ src/parser.c \
src/url.c
}
diff --git a/include/gmni.h b/include/gmni.h
@@ -103,4 +103,64 @@ char *gemini_input_url(const char *url, const char *input);
// of the given Gemini status code.
enum gemini_status_class gemini_response_class(enum gemini_status status);
+enum gemini_tok {
+ GEMINI_TEXT,
+ GEMINI_LINK,
+ GEMINI_PREFORMATTED,
+ GEMINI_HEADING,
+ GEMINI_LIST_ITEM,
+ GEMINI_QUOTE,
+};
+
+struct gemini_token {
+ enum gemini_tok token;
+
+ // The token field determines which of the union members is valid.
+ union {
+ char *text;
+
+ struct {
+ char *text;
+ char *url; // May be NULL
+ } link;
+
+ struct {
+ char *text;
+ char *alt_text; // May be NULL
+ } preformatted;
+
+ struct {
+ char *title;
+ int level; // 1, 2, or 3
+ } heading;
+
+ char *list_item;
+ char *quote_text;
+ };
+};
+
+struct gemini_parser {
+ BIO *f;
+ char *buf;
+ size_t bufsz;
+ size_t bufln;
+};
+
+// Initializes a text/gemini parser which reads from the specified BIO.
+void gemini_parser_init(struct gemini_parser *p, BIO *f);
+
+// Finishes this text/gemini parser and frees up its resources.
+void gemini_parser_finish(struct gemini_parser *p);
+
+// Reads the next token from a text/gemini file.
+//
+// Returns 0 on success, 1 on EOF, and -1 on failure.
+//
+// Caller must call gemini_token_finish before exiting or re-using the token
+// parameter.
+int gemini_parser_next(struct gemini_parser *p, struct gemini_token *token);
+
+// Must be called after gemini_next to free up resources for the next token.
+void gemini_token_finish(struct gemini_token *token);
+
#endif
diff --git a/src/gmni.c b/src/gmni.c
@@ -233,9 +233,11 @@ main(int argc, char *argv[])
printf("%d %s\n", resp.status, resp.meta);
/* fallthrough */
case OMIT_HEADERS:
- if (resp.status / 10 != 2) {
+ if (gemini_response_class(resp.status) !=
+ GEMINI_STATUS_CLASS_SUCCESS) {
break;
}
+
char last;
char buf[BUFSIZ];
for (int n = 1; n > 0;) {
diff --git a/src/parser.c b/src/parser.c
@@ -0,0 +1,144 @@
+#include <assert.h>
+#include <ctype.h>
+#include <openssl/bio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gmni.h"
+
+void
+gemini_parser_init(struct gemini_parser *p, BIO *f)
+{
+ p->f = f;
+ p->bufln = 0;
+ p->bufsz = BUFSIZ;
+ p->buf = malloc(p->bufsz + 1);
+ p->buf[0] = 0;
+ BIO_up_ref(p->f);
+}
+
+void
+gemini_parser_finish(struct gemini_parser *p)
+{
+ if (!p) {
+ return;
+ }
+ BIO_free(p->f);
+ free(p->buf);
+}
+
+int
+gemini_parser_next(struct gemini_parser *p, struct gemini_token *tok)
+{
+ memset(tok, 0, sizeof(*tok));
+
+ int eof = 0;
+ while (!strstr(p->buf, "\n")) {
+ if (p->bufln == p->bufsz) {
+ p->bufsz *= 2;
+ char *buf = realloc(p->buf, p->bufsz);
+ assert(buf);
+ p->buf = buf;
+ }
+
+ int n = BIO_read(p->f, &p->buf[p->bufln], p->bufsz - p->bufln);
+ if (n == -1) {
+ return -1;
+ } else if (n == 0) {
+ eof = 1;
+ break;
+ }
+ p->bufln += n;
+ p->buf[p->bufln] = 0;
+ }
+
+ // TODO: Collapse multi-line text for the user-agent to wrap
+ char *end;
+ if ((end = strstr(p->buf, "\n")) != NULL) {
+ *end = 0;
+ }
+
+ // TODO: Provide whitespace trimming helper function
+ if (strncmp(p->buf, "=>", 2) == 0) {
+ tok->token = GEMINI_LINK;
+ int i = 2;
+ while (p->buf[i] && isspace(p->buf[i])) ++i;
+ tok->link.url = &p->buf[i];
+
+ for (; p->buf[i]; ++i) {
+ if (isspace(p->buf[i])) {
+ p->buf[i++] = 0;
+ while (isspace(p->buf[i])) ++i;
+ if (p->buf[i]) {
+ tok->link.text = strdup(&p->buf[i]);
+ }
+ break;
+ }
+ }
+
+ tok->link.url = strdup(tok->link.url);
+ } else if (strncmp(p->buf, "```", 3) == 0) {
+ tok->token = GEMINI_PREFORMATTED; // TODO
+ tok->preformatted.text = strdup("<text>");
+ tok->preformatted.alt_text = strdup("<alt-text>");
+ } else if (p->buf[0] == '#') {
+ tok->token = GEMINI_HEADING;
+ int level = 1;
+ while (p->buf[level] == '#' && level < 3) {
+ ++level;
+ }
+ tok->heading.level = level;
+ tok->heading.title = strdup(&p->buf[level]);
+ } else if (p->buf[0] == '*') {
+ tok->token = GEMINI_LIST_ITEM;
+ tok->list_item = strdup(&p->buf[1]);
+ } else if (p->buf[0] == '>') {
+ tok->token = GEMINI_QUOTE;
+ tok->quote_text = strdup(&p->buf[1]);
+ } else {
+ tok->token = GEMINI_TEXT;
+ tok->text = strdup(p->buf);
+ }
+
+ if (end && end + 1 < p->buf + p->bufln) {
+ size_t len = end - p->buf + 1;
+ memmove(p->buf, end + 1, p->bufln - len);
+ p->bufln -= len;
+ } else {
+ p->buf[0] = 0;
+ p->bufln = 0;
+ }
+
+ return eof;
+}
+
+void
+gemini_token_finish(struct gemini_token *tok)
+{
+ if (!tok) {
+ return;
+ }
+
+ switch (tok->token) {
+ case GEMINI_TEXT:
+ free(tok->text);
+ break;
+ case GEMINI_LINK:
+ free(tok->link.text);
+ free(tok->link.url);
+ break;
+ case GEMINI_PREFORMATTED:
+ free(tok->preformatted.text);
+ free(tok->preformatted.alt_text);
+ break;
+ case GEMINI_HEADING:
+ free(tok->heading.title);
+ break;
+ case GEMINI_LIST_ITEM:
+ free(tok->list_item);
+ break;
+ case GEMINI_QUOTE:
+ free(tok->quote_text);
+ break;
+ }
+}