commit a9f9a229d5be860a5fdab051fbda7ece66d2dd64
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 28 May 2022 12:09:41 +0200
initial import
Diffstat:
A | LICENSE | | | 15 | +++++++++++++++ |
A | Makefile | | | 17 | +++++++++++++++++ |
A | README | | | 116 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | hmac_sha1.c | | | 63 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | hmac_sha1.h | | | 4 | ++++ |
A | pubsub_cgi.c | | | 463 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | pubsub_gethub.c | | | 149 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | pubsub_setup | | | 133 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | sha1.c | | | 145 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | sha1.h | | | 13 | +++++++++++++ |
A | strlcat.c | | | 54 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | xml.c | | | 415 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | xml.h | | | 43 | +++++++++++++++++++++++++++++++++++++++++++ |
13 files changed, 1630 insertions(+), 0 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2022 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,17 @@
+.POSIX:
+
+PREFIX = /usr/local
+CGIDIR = /var/www/cgi-bin
+
+build: clean
+ ${CC} -c sha1.c ${CFLAGS} ${CPPFLAGS}
+ ${CC} -c hmac_sha1.c ${CFLAGS} ${CPPFLAGS}
+ ${CC} -c strlcat.c xml.c ${CFLAGS} ${CPPFLAGS}
+ ${CC} -c pubsub_cgi.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE
+ ${CC} -c pubsub_gethub.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE
+ # link
+ ${CC} -o pubsub_cgi hmac_sha1.o sha1.o pubsub_cgi.o ${LDFLAGS} -static -s
+ ${CC} -o pubsub_gethub strlcat.o xml.o pubsub_gethub.o ${LDFLAGS}
+
+clean:
+ rm -f *.o pubsub_cgi pubsub_gethub
diff --git a/README b/README
@@ -0,0 +1,116 @@
+pubsubhubbubblub
+----------------
+
+Generic pubsubhubbub client implementation.
+Helper scripts to use it with sfeed.
+
+
+What is it
+----------
+
+pubsubhubbub is a publisher/subscriber technology used to push updates in a webhook-like way.
+This allows to push content updates, instead of polling for news in an interval.
+
+
+Features
+--------
+
+- Not many dependencies.
+- Uses pledge and unveil on OpenBSD.
+- Signatures (hub.secret) support, Pubsubhub 0.4 core SHA1 only.
+
+
+Dependencies
+------------
+
+- C compiler
+
+
+Files
+-----
+
+pubsub_cgi.c - Small stupid PubSubHubBub implementation as a CGI program.
+pubsub_gethub - Helper program extract a hub and feed URL from a RSS or Atom feed data.
+pubsub_setup - Helper script that sets up the directory structure for
+ processing the feed for the CGI program. It has an
+ -s option to subscribe and an -u option to unsubscribe at a hub also.
+
+
+How to install
+--------------
+
+For the CGI program:
+
+OpenBSD httpd and slowcgi, httpd.conf:
+
+ location "/pubsub/**" {
+ request strip 1
+ root "/cgi-bin/pubsub"
+ fastcgi socket "/run/slowcgi.sock"
+ }
+
+Compile cgi.c statically and copy it to /var/www/cgi-bin/pubsub
+
+- Create a directory with write-access for the pubsub CGI program
+ /var/www/pubsub-data/feedname. The setup_feed.sh script can be used to create
+ the directories.
+- Make sure to set the proper permissions for the CGI program (slowcgi) and
+ HTTPd.
+- The base name of the CGI script can be changed in the setup_feed.sh script.
+
+
+How does it work
+----------------
+
+The CGI program https://codemadness.org/pubsub/slashdot/secrettoken
+
+
+Directory structure:
+
+/pubsub-data/config/feedname/ - Directory with metadata about the feed.
+/pubsub-data/config/feedname/hub - The hub URL, for example http://pubsubhubbub.appspot.com/ .
+/pubsub-data/config/feedname/topic - hub.topic, the feed URL.
+/pubsub-data/config/feedname/secret - hub.secret for calculating the message digest,
+ see Section 8 of Pubsubhubbub core 0.4.
+/pubsub-data/config/feedname/token - File containing a line with a secret token. This makes sure an entrypoint
+ is not easy guessable (by different hubs etc).
+/pubsub-data/feeds/feedname/ - Directory containing processed messages.
+/pubsub-data/tmp/feedname/ - Temporary directory to process messages.
+ Moves to the feeds/feedname directory on success.
+/pubsub-data/log - Log file, TAB-separated.
+
+
+Example
+-------
+
+Get the hub and feed URL:
+
+ curl -s http://rss.slashdot.org/Slashdot/slashdot | pubsub_gethub
+
+ http://rss.slashdot.org/Slashdot/slashdot self
+ http://pubsubhubbub.appspot.com/ hub
+
+Setup the feed for the CGI program:
+ cd /var/www/pubsub-data
+ pubsub_setup -s 'slashdot' 'http://pubsubhubbub.appspot.com/' 'http://rss.slashdot.org/Slashdot/slashdot'
+
+
+Monitor script example
+----------------------
+
+This monitors the log file using tail(1) and uses sfeed and sfeed_plain to write the line to stdout.
+This can then be piped to the suckless ii(1) program for IRC notifications for example.
+It uses sfeed for parsing RSS and Atom content and formats it to a plain-text list.
+
+ #!/bin/sh
+ cd /var/www/pubsub-data
+ tail -f log | \
+ LC_ALL=C awk '{ print $2 "\t" $3; fflush(); }' | \
+ while IFS=" " read -r feed file; do sfeed < "feeds/${feed}/${file}"; done | \
+ sfeed_plain
+
+
+References
+----------
+
+Pubsubhubbub core 0.4: https://pubsubhubbub.github.io/PubSubHubbub/pubsubhubbub-core-0.4.html
diff --git a/hmac_sha1.c b/hmac_sha1.c
@@ -0,0 +1,63 @@
+/* Adapted from RFC2104 hmac_md5, some code-style changes and data streaming support. */
+
+#include <string.h>
+#include <stdio.h>
+
+#include "hmac_sha1.h"
+
+void
+hmac_sha1_init(SHA_CTX *ctx, const unsigned char *key, size_t key_len,
+unsigned char *k_opad, size_t k_opadlen)
+{
+ SHA_CTX tctx;
+ unsigned char k_ipad[65]; /* inner padding - key XORd with ipad */
+ unsigned char tk[20];
+ int i;
+
+ /* if key is longer than 64 bytes reset it to key=SHA1(key) */
+ if (key_len > 64) {
+ SHA1_Init(&tctx);
+ SHA1_Update(&tctx, key, key_len);
+ SHA1_Final(tk, &tctx);
+
+ key = tk;
+ key_len = 20;
+ }
+
+ /*
+ * the HMAC_SHA1 transform looks like:
+ *
+ * SHA1(K XOR opad, SHA1(K XOR ipad, text))
+ *
+ * where K is an n byte key
+ * ipad is the byte 0x36 repeated 64 times
+ * opad is the byte 0x5c repeated 64 times
+ * and text is the data being protected
+ */
+
+ /* start out by storing key in pads */
+ memset(k_ipad, 0, sizeof(k_ipad));
+ memset(k_opad, 0, k_opadlen);
+ memcpy(k_ipad, key, key_len);
+ memcpy(k_opad, key, key_len);
+
+ /* XOR key with ipad and opad values */
+ for (i = 0; i < 64; i++) {
+ k_ipad[i] ^= 0x36;
+ k_opad[i] ^= 0x5c;
+ }
+ /* perform inner SHA1 */
+ SHA1_Init(ctx); /* init context for 1st pass */
+ SHA1_Update(ctx, k_ipad, 64); /* start with inner pad */
+}
+
+void
+hmac_sha1_final(SHA_CTX *ctx, const unsigned char *k_opad, unsigned char *digest)
+{
+ SHA1_Final(digest, ctx); /* finish up 1st pass */
+ /* perform outer SHA1 */
+ SHA1_Init(ctx); /* init context for 2nd pass */
+ SHA1_Update(ctx, k_opad, 64); /* start with outer pad */
+ SHA1_Update(ctx, digest, 20); /* then results of 1st hash */
+ SHA1_Final(digest, ctx); /* finish up 2nd pass */
+}
diff --git a/hmac_sha1.h b/hmac_sha1.h
@@ -0,0 +1,4 @@
+#include "sha1.h"
+
+void hmac_sha1_init(SHA_CTX *, const unsigned char *, size_t, unsigned char *, size_t);
+void hmac_sha1_final(SHA_CTX *, const unsigned char *, unsigned char *);
diff --git a/pubsub_cgi.c b/pubsub_cgi.c
@@ -0,0 +1,463 @@
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#ifdef __OpenBSD__
+#include <unistd.h>
+#else
+#define pledge(p1,p2) 0
+#define unveil(p1,p2) 0
+#endif
+
+#include "hmac_sha1.h"
+
+static const char *relpath = "/pubsub/";
+
+#define DATADIR "/pubsub-data"
+
+static const char *configdir = DATADIR "/config";
+static const char *datadir = DATADIR "/feeds";
+static const char *tmpdir = DATADIR "/tmp";
+static const char *logfile = DATADIR "/log";
+static time_t now;
+
+char *
+readfile(const char *path)
+{
+ static char buf[256];
+ FILE *fp;
+
+ if (!(fp = fopen(path, "rb")))
+ goto err;
+ if (!fgets(buf, sizeof(buf), fp))
+ goto err;
+ fclose(fp);
+ buf[strcspn(buf, "\n")] = '\0';
+ return buf;
+
+err:
+ if (fp)
+ fclose(fp);
+ return NULL;
+}
+
+int
+hexdigit(int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+
+ return 0;
+}
+
+/* decode until NUL separator or end of "key". */
+int
+decodeparamuntilend(char *buf, size_t bufsiz, const char *s, int end)
+{
+ size_t i;
+
+ if (!bufsiz)
+ return -1;
+
+ for (i = 0; *s && *s != end; s++) {
+ switch (*s) {
+ case '%':
+ if (i + 3 >= bufsiz)
+ return -1;
+ if (!isxdigit((unsigned char)*(s+1)) ||
+ !isxdigit((unsigned char)*(s+2)))
+ return -1;
+ buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
+ s += 2;
+ break;
+ case '+':
+ if (i + 1 >= bufsiz)
+ return -1;
+ buf[i++] = ' ';
+ break;
+ default:
+ if (i + 1 >= bufsiz)
+ return -1;
+ buf[i++] = *s;
+ break;
+ }
+ }
+ buf[i] = '\0';
+
+ return i;
+}
+
+/* decode until NUL separator or end of "key". */
+int
+decodeparam(char *buf, size_t bufsiz, const char *s)
+{
+ return decodeparamuntilend(buf, bufsiz, s, '&');
+}
+
+char *
+getparam(const char *query, const char *s)
+{
+ const char *p, *last = NULL;
+ size_t len;
+
+ len = strlen(s);
+ for (p = query; (p = strstr(p, s)); p += len) {
+ if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '?'))
+ last = p + len + 1;
+ }
+
+ return (char *)last;
+}
+
+const char *
+httpstatusmsg(int code)
+{
+ switch (code) {
+ case 200: return "200 OK";
+ case 202: return "202 Accepted";
+ case 400: return "400 Bad Request";
+ case 403: return "403 Forbidden";
+ case 404: return "404 Not Found";
+ case 500: return "500 Internal Server Error";
+ }
+ return NULL;
+}
+
+void
+httpstatus(int code)
+{
+ const char *msg;
+
+ if ((msg = httpstatusmsg(code)))
+ printf("Status: %s\r\n", msg);
+}
+
+void
+httperror(int code, const char *s)
+{
+ httpstatus(code);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+ if (s)
+ printf("%s: %s\r\n", httpstatusmsg(code), s);
+ else
+ printf("%s\r\n", httpstatusmsg(code));
+ exit(0);
+}
+
+void
+badrequest(const char *s)
+{
+ httperror(400, s);
+}
+
+void
+forbidden(const char *s)
+{
+ httperror(403, s);
+}
+
+void
+notfound(const char *s)
+{
+ httperror(404, s);
+}
+
+void
+servererror(const char *s)
+{
+ httperror(500, s);
+}
+
+void
+logrequest(const char *feedname, const char *filename, const char *signature)
+{
+ FILE *fp;
+
+ /* file format: timestamp TAB feedname TAB data-filename */
+ if (!(fp = fopen(logfile, "a")))
+ servererror("cannot write data");
+ fprintf(fp, "%lld\t", (long long)now);
+ fputs(feedname, fp);
+ fputs("\t", fp);
+ fputs(filename, fp);
+ fputs("\t", fp);
+ fputs(signature, fp);
+ fputs("\n", fp);
+ fclose(fp);
+}
+
+char *
+contenttypetoext(const char *s)
+{
+ return "xml"; /* for now just support XML, for RSS and Atom */
+}
+
+int
+main(void)
+{
+ FILE *fpdata;
+ char challenge[256], mode[32] = "", signature[128] = "";
+ char requesturi[4096], requesturidecoded[4096];
+ char feedname[256], token[256] = "";
+ char filename[PATH_MAX], tmpfilename[PATH_MAX];
+ char configpath[PATH_MAX], feedpath[PATH_MAX], secretpath[PATH_MAX];
+ char tokenpath[PATH_MAX];
+ char *contentlength = "", *contenttype = "", *method = "GET", *query = "";
+ char *p, *fileext, *tmp;
+ char buf[4096];
+ size_t n, total;
+ long long ll;
+ int i, j, fd, r;
+ /* HMAC */
+ SHA_CTX ctx;
+ unsigned char key_opad[65]; /* outer padding - key XORd with opad */
+ unsigned char *key;
+ size_t key_len;
+ unsigned char digest[SHA_DIGEST_LENGTH];
+ unsigned char inputdigest[SHA_DIGEST_LENGTH];
+
+ if (unveil(DATADIR, "rwc") == -1)
+ err(1, "unveil");
+ if (pledge("stdio rpath wpath cpath fattr", NULL) == -1)
+ err(1, "pledge");
+
+ if ((tmp = getenv("CONTENT_TYPE")))
+ contenttype = tmp;
+ if ((tmp = getenv("CONTENT_LENGTH")))
+ contentlength = tmp;
+ if ((tmp = getenv("REQUEST_METHOD")))
+ method = tmp;
+ if ((tmp = getenv("QUERY_STRING")))
+ query = tmp;
+
+ /* "8. Authenticated Content Distribution" */
+ if ((p = getenv("HTTP_X_HUB_SIGNATURE"))) {
+ r = snprintf(signature, sizeof(signature), "%s", p);
+ if (r < 0 || (size_t)r >= sizeof(signature))
+ badrequest("invalid signature (truncated)");
+
+ /* accept sha1=digest or sha=digest */
+ if ((tmp = strstr(signature, "sha1=")))
+ tmp += sizeof("sha1=") - 1;
+ else if ((tmp = strstr(signature, "sha=")))
+ tmp += sizeof("sha=") - 1;
+ if (tmp) {
+ for (p = tmp, i = 0; *p; p++, i++) {
+ if (!isxdigit((unsigned char)*p))
+ break;
+ }
+ }
+ if (tmp && !*p && i == (SHA_DIGEST_LENGTH * 2)) {
+ for (i = 0, j = 0, p = tmp; i < SHA_DIGEST_LENGTH; i++, j += 2) {
+ inputdigest[i] = (hexdigit(p[j]) << 4) |
+ hexdigit(p[j + 1]);
+ }
+ } else {
+ badrequest("invalid hash format");
+ }
+ }
+
+ if (!(p = getenv("REQUEST_URI")))
+ p = "";
+ snprintf(requesturi, sizeof(requesturi), "%s", p);
+ if ((p = strchr(requesturi, '?')))
+ *p = '\0'; /* remove query string */
+
+ if (decodeparamuntilend(requesturidecoded, sizeof(requesturidecoded), requesturi, '\0') == -1)
+ badrequest("request URI");
+
+ p = requesturidecoded;
+ if (strncmp(p, relpath, strlen(relpath)))
+ forbidden("invalid relative path");
+ p += strlen(relpath);
+
+ /* first part of path of request URI is the feedname, last part is the (optional) token */
+ if ((tmp = strchr(p, '/'))) {
+ *tmp = '\0'; /* temporary NUL terminate */
+
+ r = snprintf(feedname, sizeof(feedname), "%s", p);
+ if (r < 0 || (size_t)r >= sizeof(feedname))
+ servererror("path truncated");
+
+ r = snprintf(token, sizeof(token), "%s", tmp + 1);
+ if (r < 0 || (size_t)r >= sizeof(token))
+ servererror("path truncated");
+
+ *tmp = '/'; /* restore NUL byte to '/' */
+ } else {
+ r = snprintf(feedname, sizeof(feedname), "%s", p);
+ if (r < 0 || (size_t)r >= sizeof(feedname))
+ servererror("path truncated");
+ }
+ if (strstr(feedname, ".."))
+ badrequest("invalid feed name");
+
+ /* check if configdir of feedname exists, else skip request and return 404 */
+ r = snprintf(configpath, sizeof(configpath), "%s/%s", configdir, feedname);
+ if (r < 0 || (size_t)r >= sizeof(configpath))
+ servererror("path truncated");
+ if (access(configpath, X_OK) == -1)
+ notfound("feed entrypoint does not exist");
+
+ r = snprintf(tokenpath, sizeof(tokenpath), "%s/%s/token", configdir, feedname);
+ if (r < 0 || (size_t)r >= sizeof(tokenpath))
+ servererror("path truncated");
+ if ((tmp = readfile(tokenpath))) {
+ if (strcmp(tmp, token))
+ forbidden("missing or incorrect token in path");
+ }
+
+ if (!strcasecmp(method, "POST")) {
+ if (!feedname[0])
+ badrequest("feed name part of path is missing");
+
+ /* read secret, initialize for HMAC and data signature verification */
+ r = snprintf(secretpath, sizeof(secretpath), "%s/%s/secret", configdir, feedname);
+ if (r < 0 || (size_t)r >= sizeof(secretpath))
+ servererror("path truncated");
+ key = readfile(secretpath);
+ if (key && !signature[0])
+ forbidden("requires signature header X-Hub-Signature");
+
+ if (key) {
+ key_len = strlen(key);
+ hmac_sha1_init(&ctx, key, key_len, key_opad, sizeof(key_opad));
+ }
+
+ /* temporary file with random characters */
+ if ((now = time(NULL)) == (time_t)-1)
+ servererror("cannot get current time");
+ r = snprintf(tmpfilename, sizeof(tmpfilename), "%s/%s/%lld.XXXXXX", tmpdir, feedname, (long long)now);
+ if (r < 0 || (size_t)r >= sizeof(tmpfilename))
+ servererror("path truncated");
+
+ if ((fd = mkstemp(tmpfilename)) == -1)
+ servererror("cannot create tmpfilename");
+ if (!(fpdata = fdopen(fd, "wb")))
+ servererror(tmpfilename);
+
+ total = 0;
+ while ((n = fread(buf, 1, sizeof(buf), stdin)) == sizeof(buf)) {
+ if (fwrite(buf, 1, n, fpdata) != n)
+ break;
+ if (key)
+ SHA1_Update(&ctx, buf, n); /* hash data for signature */
+ total += n;
+ }
+ if (n) {
+ fwrite(buf, 1, n, fpdata);
+ if (key)
+ SHA1_Update(&ctx, buf, n);
+ total += n;
+ }
+ if (ferror(stdin)) {
+ fclose(fpdata);
+ unlink(tmpfilename);
+ servererror("cannot process POST message: read error");
+ }
+ if (fflush(fpdata) || ferror(fpdata)) {
+ fclose(fpdata);
+ unlink(tmpfilename);
+ servererror("cannot process POST message: write error");
+ }
+ fclose(fpdata);
+ chmod(tmpfilename, 0644);
+
+ /* if Content-Length is set then check if it matches */
+ if (contentlength[0]) {
+ ll = strtoll(contentlength, NULL, 10);
+ if (ll < 0 || (size_t)ll != total) {
+ unlink(tmpfilename);
+ badrequest("Content-Length does not match");
+ }
+ }
+
+ if (key) {
+ /* finalize signature digest */
+ hmac_sha1_final(&ctx, key_opad, digest);
+
+ /* compare digest */
+ if (memcmp(inputdigest, digest, sizeof(digest))) {
+ unlink(tmpfilename);
+ forbidden("invalid digest for data");
+ }
+ }
+
+ /* use part of basename of the random temp file as the filename */
+ if (!(tmp = strrchr(tmpfilename, '/')))
+ servererror("invalid path"); /* cannot happen */
+ r = snprintf(feedpath, sizeof(feedpath), "%s/%s", datadir, feedname);
+ if (r < 0 || (size_t)r >= sizeof(feedpath))
+ servererror("path truncated");
+ fileext = contenttypetoext(contenttype);
+ r = snprintf(filename, sizeof(filename), "%s/%s%s%s", feedpath, tmp + 1,
+ fileext[0] ? "." : "", fileext);
+ if (r < 0 || (size_t)r >= sizeof(filename))
+ servererror("path truncated");
+
+ if ((r = rename(tmpfilename, filename)) != 0) {
+ unlink(filename);
+ unlink(tmpfilename);
+ servererror("cannot process POST message: failed to rename file");
+ }
+ chmod(filename, 0644);
+
+ httpstatus(200);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+
+ /* output stored file: feedname, basename of the file */
+ if ((tmp = strrchr(filename, '/')))
+ tmp++;
+ else
+ tmp = "";
+ printf("%s/%s\n", feedname, tmp);
+
+ /* write to a log file, this could be a pipe or used with tail -f to monitor */
+ logrequest(feedname, tmp, signature);
+
+ return 0;
+ }
+
+ if ((p = getparam(query, "hub.mode"))) {
+ if (decodeparam(mode, sizeof(mode), p) == -1)
+ badrequest("hub.mode");
+ }
+
+ if (!strcmp(mode, "subscribe") || !strcmp(mode, "unsubscribe")) {
+ if ((p = getparam(query, "hub.challenge"))) {
+ if (decodeparam(challenge, sizeof(challenge), p) == -1)
+ badrequest("hub.challenge");
+ }
+ if (!challenge[0])
+ badrequest("hub.challenge is required, but is missing");
+
+ httpstatus(202);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+ printf("%s\r\n", challenge);
+ return 0;
+ } else if (mode[0]) {
+ badrequest("hub.mode: only subscribe or unsubscribe is supported");
+ }
+
+ httpstatus(200);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
+ fputs("\r\n", stdout);
+ printf("pubsubhubbubblub running perfectly and flapping graciously in the wind.\r\n");
+
+ return 0;
+}
diff --git a/pubsub_gethub.c b/pubsub_gethub.c
@@ -0,0 +1,149 @@
+#include <err.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+
+#undef strlcat
+size_t strlcat(char *, const char *, size_t);
+
+#include "xml.h"
+
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
+
+/* string and size */
+#define STRP(s) s,sizeof(s)-1
+
+static XMLParser parser;
+static int islinktag, ishrefattr, isrelattr;
+static char linkhref[4096], linkrel[256];
+
+/* strcasestr() included for portability */
+char *
+strcasestr(const char *h, const char *n)
+{
+ size_t i;
+
+ if (!n[0])
+ return (char *)h;
+
+ for (; *h; ++h) {
+ for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
+ TOLOWER((unsigned char)h[i]); ++i)
+ ;
+ if (n[i] == '\0')
+ return (char *)h;
+ }
+
+ return NULL;
+}
+
+static void
+printvalue(const char *s)
+{
+ for (; *s; s++)
+ if (!ISCNTRL((unsigned char)*s))
+ putchar(*s);
+}
+
+static void
+xmltagstart(XMLParser *p, const char *t, size_t tl)
+{
+ islinktag = 0;
+ char *l;
+
+ if (((l = strcasestr(t, ":link")) && !strcasecmp(l, ":link")) ||
+ !strcasecmp(t, "link")) {
+ islinktag = 1;
+ linkhref[0] = '\0';
+ linkrel[0] = '\0';
+ }
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ if (!islinktag)
+ return;
+
+ if (strncasecmp(linkrel, STRP("hub")) &&
+ strncasecmp(linkrel, STRP("self")))
+ return;
+
+ printvalue(linkhref);
+ putchar('\t');
+ printvalue(linkrel);
+ putchar('\n');
+}
+
+static void
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al)
+{
+ ishrefattr = isrelattr = 0;
+
+ if (!islinktag)
+ return;
+
+ if (!strcasecmp(a, "href")) {
+ ishrefattr = 1;
+ linkhref[0] = '\0';
+ } else if (!strcasecmp(a, "rel")) {
+ isrelattr = 1;
+ linkrel[0] = '\0';
+ }
+}
+
+static void
+xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+ const char *v, size_t vl)
+{
+ if (islinktag) {
+ if (ishrefattr)
+ strlcat(linkhref, v, sizeof(linkhref));
+ else if (isrelattr)
+ strlcat(linkrel, v, sizeof(linkrel));
+ }
+}
+
+static void
+xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al,
+ const char *v, size_t vl)
+{
+ char buf[16];
+ int len;
+
+ if (!ishrefattr && !isrelattr)
+ return;
+
+ /* try to translate entity, else just pass as data to
+ * xmlattr handler. */
+ if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
+ xmlattr(p, t, tl, a, al, buf, (size_t)len);
+ else
+ xmlattr(p, t, tl, a, al, v, vl);
+}
+
+int
+main(void)
+{
+#ifdef __OpenBSD__
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+#endif
+
+ parser.xmlattr = xmlattr;
+ parser.xmlattrentity = xmlattrentity;
+ parser.xmlattrstart = xmlattrstart;
+ parser.xmltagstart = xmltagstart;
+ parser.xmltagstartparsed = xmltagstartparsed;
+
+ /* NOTE: getnext is defined in xml.h for inline optimization */
+ xml_parse(&parser);
+
+ if (ferror(stdin))
+ fputs("read error: <stdin>\n", stderr);;
+ if (fflush(stdout) || ferror(stdout))
+ fputs("write error: <stdout>\n", stderr);
+
+ return 0;
+}
diff --git a/pubsub_setup b/pubsub_setup
@@ -0,0 +1,133 @@
+#!/bin/sh
+
+while getopts "c:su" f; do
+ case "${f}" in
+ s) dosubscribe=1;;
+ u) dounsubscribe=1;;
+ esac
+done
+shift $(expr ${OPTIND} - 1)
+
+base="https://codemadness.org/pubsub/"
+
+# Linux
+shacmd="$(command -v sha256sum)"
+# BSD
+test "${shacmd}" = "" && shacmd=$(command -v sha256)
+if test "${shacmd}" = ""; then
+ echo "No sha256 or sha256sum tool found" >&2
+ exit 1
+fi
+
+# sha()
+sha() {
+ ${shacmd} | cut -f 1 -d ' '
+}
+
+# log(s)
+log() {
+ echo "$1" >&2
+}
+
+# subscribe(feedname, hub, topic, callback, mode, secret)
+subscribe() {
+ feedname="$1"
+ hub="$2"
+ topic="$3"
+ callback="$4"
+ mode="${5:-subscribe}"
+ secret="$6"
+ verify="async" # or "sync"
+ lease_seconds=""
+
+# if curl -s -f -H 'User-Agent:' -m 15 \
+ # DEBUG
+ if curl -v -f -H 'User-Agent:' -m 15 \
+ -L --max-redirs 3 \
+ --data-raw "hub.callback=${callback}" \
+ --data-raw "hub.lease_seconds=${lease_seconds}" \
+ --data-raw "hub.mode=${mode}" \
+ --data-raw "hub.secret=${secret}" \
+ --data-raw "hub.topic=${topic}" \
+ --data-raw "hub.verify=${verify}" \
+ "${hub}/subscribe"; then
+ log "${mode} OK"
+ return 0
+ else
+ log "${mode} FAIL"
+ return 1
+ fi
+}
+
+feedname="$1"
+hub="$2"
+topic="$3"
+if test "$1" = "" -o "$2" = "" -o "$3" = ""; then
+ echo "usage: $0 [-s] [-u] <feedname> <hub> <topic>" >&2
+ exit 1
+fi
+
+isnew=1
+test -d "config/${feedname}" && isnew=0
+
+mkdir -p "config/${feedname}"
+mkdir -p "feeds/${feedname}"
+mkdir -p "tmp/${feedname}"
+
+# general log
+touch "log"
+
+if test "${dosubscribe}" = "1"; then
+ f="config/${feedname}/hub"
+ if test -f "${f}"; then
+ echo "already registered? file exists: ${f}, skipping subscribing" >&2
+ exit 1
+ fi
+fi
+
+# generate random token if it does not exist.
+f="config/${feedname}/token"
+if ! test -f "${f}" -a "${isnew}" = "1"; then
+ token="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)"
+ echo "${token}" > "${f}"
+fi
+
+# generate random secret if it does not exist.
+f="config/${feedname}/secret"
+if ! test -f "${f}" -a "${isnew}" = "1"; then
+ secret="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)"
+ echo "${secret}" > "${f}"
+fi
+
+# read config.
+f="config/${feedname}/token"
+token=$(cat "${f}" 2>/dev/null)
+callback="$1/${token}"
+f="config/${feedname}/secret"
+secret=$(cat "${f}" 2>/dev/null)
+
+callback="${base}${feedname}/${token}"
+
+if test "${dosubscribe}" = "1"; then
+ f="config/${feedname}/hub"
+ if test -f "${f}"; then
+ echo "already registered? file exists: ${f}, skipping subscribing" >&2
+ exit 1
+ fi
+
+ # register at hub. save state when succesfully registered.
+ if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "subscribe" "${secret}"; then
+ printf '%s\n' "${callback}" > "config/${feedname}/callback"
+ printf '%s\n' "${hub}" > "config/${feedname}/hub"
+ printf '%s\n' "${topic}" > "config/${feedname}/topic"
+ fi
+fi
+
+if test "${dounsubscribe}" = "1"; then
+ # unregister at hub. remove state when succesfully registered.
+ if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "unsubscribe" "${secret}"; then
+ rm -f "config/${feedname}/callback"
+ rm -f "config/${feedname}/hub"
+ rm -f "config/${feedname}/topic"
+ fi
+fi
diff --git a/sha1.c b/sha1.c
@@ -0,0 +1,145 @@
+/* Public domain SHA1 implementation based on RFC3174 and libtomcrypt
+ Modified to make function prototypes compatible with OpenSSL / LibreSSL. */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "sha1.h"
+
+static uint32_t rol(uint32_t n, int k) { return (n << k) | (n >> (32-k)); }
+#define F0(b,c,d) (d ^ (b & (c ^ d)))
+#define F1(b,c,d) (b ^ c ^ d)
+#define F2(b,c,d) ((b & c) | (d & (b | c)))
+#define F3(b,c,d) (b ^ c ^ d)
+#define G0(a,b,c,d,e,i) e += rol(a,5)+F0(b,c,d)+W[i]+0x5A827999; b = rol(b,30)
+#define G1(a,b,c,d,e,i) e += rol(a,5)+F1(b,c,d)+W[i]+0x6ED9EBA1; b = rol(b,30)
+#define G2(a,b,c,d,e,i) e += rol(a,5)+F2(b,c,d)+W[i]+0x8F1BBCDC; b = rol(b,30)
+#define G3(a,b,c,d,e,i) e += rol(a,5)+F3(b,c,d)+W[i]+0xCA62C1D6; b = rol(b,30)
+
+static void
+processblock(SHA_CTX *s, const unsigned char *buf)
+{
+ uint32_t W[80], a, b, c, d, e;
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ W[i] = (uint32_t)buf[4*i]<<24;
+ W[i] |= (uint32_t)buf[4*i+1]<<16;
+ W[i] |= (uint32_t)buf[4*i+2]<<8;
+ W[i] |= buf[4*i+3];
+ }
+ for (; i < 80; i++)
+ W[i] = rol(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
+ a = s->h[0];
+ b = s->h[1];
+ c = s->h[2];
+ d = s->h[3];
+ e = s->h[4];
+ for (i = 0; i < 20; ) {
+ G0(a,b,c,d,e,i++);
+ G0(e,a,b,c,d,i++);
+ G0(d,e,a,b,c,i++);
+ G0(c,d,e,a,b,i++);
+ G0(b,c,d,e,a,i++);
+ }
+ while (i < 40) {
+ G1(a,b,c,d,e,i++);
+ G1(e,a,b,c,d,i++);
+ G1(d,e,a,b,c,i++);
+ G1(c,d,e,a,b,i++);
+ G1(b,c,d,e,a,i++);
+ }
+ while (i < 60) {
+ G2(a,b,c,d,e,i++);
+ G2(e,a,b,c,d,i++);
+ G2(d,e,a,b,c,i++);
+ G2(c,d,e,a,b,i++);
+ G2(b,c,d,e,a,i++);
+ }
+ while (i < 80) {
+ G3(a,b,c,d,e,i++);
+ G3(e,a,b,c,d,i++);
+ G3(d,e,a,b,c,i++);
+ G3(c,d,e,a,b,i++);
+ G3(b,c,d,e,a,i++);
+ }
+ s->h[0] += a;
+ s->h[1] += b;
+ s->h[2] += c;
+ s->h[3] += d;
+ s->h[4] += e;
+}
+
+static void
+pad(SHA_CTX *c)
+{
+ unsigned r = c->len % 64;
+
+ c->buf[r++] = 0x80;
+ if (r > 56) {
+ memset(c->buf + r, 0, 64 - r);
+ r = 0;
+ processblock(c, c->buf);
+ }
+ memset(c->buf + r, 0, 56 - r);
+ c->len *= 8;
+ c->buf[56] = c->len >> 56;
+ c->buf[57] = c->len >> 48;
+ c->buf[58] = c->len >> 40;
+ c->buf[59] = c->len >> 32;
+ c->buf[60] = c->len >> 24;
+ c->buf[61] = c->len >> 16;
+ c->buf[62] = c->len >> 8;
+ c->buf[63] = c->len;
+ processblock(c, c->buf);
+}
+
+int
+SHA1_Init(SHA_CTX *c)
+{
+ c->len = 0;
+ c->h[0] = 0x67452301;
+ c->h[1] = 0xEFCDAB89;
+ c->h[2] = 0x98BADCFE;
+ c->h[3] = 0x10325476;
+ c->h[4] = 0xC3D2E1F0;
+ return 1;
+}
+
+int
+SHA1_Update(SHA_CTX *c, const void *m, size_t len)
+{
+ const uint8_t *p = m;
+ unsigned r = c->len % 64;
+
+ c->len += len;
+ if (r) {
+ if (len < 64 - r) {
+ memcpy(c->buf + r, p, len);
+ return 1;
+ }
+ memcpy(c->buf + r, p, 64 - r);
+ len -= 64 - r;
+ p += 64 - r;
+ processblock(c, c->buf);
+ }
+ for (; len >= 64; len -= 64, p += 64)
+ processblock(c, p);
+ memcpy(c->buf, p, len);
+ return 1;
+}
+
+int
+SHA1_Final(unsigned char *md, SHA_CTX *c)
+{
+ int i;
+
+ pad(c);
+ for (i = 0; i < 5; i++) {
+ md[4 * i] = c->h[i] >> 24;
+ md[4 * i + 1] = c->h[i] >> 16;
+ md[4 * i + 2] = c->h[i] >> 8;
+ md[4 * i + 3] = c->h[i];
+ }
+ return 1;
+}
diff --git a/sha1.h b/sha1.h
@@ -0,0 +1,13 @@
+#include <stdint.h>
+
+typedef struct sha1 {
+ uint64_t len; /* processed message length */
+ uint32_t h[5]; /* hash state */
+ uint8_t buf[64]; /* message block buffer */
+} SHA_CTX;
+
+#define SHA_DIGEST_LENGTH 20
+
+int SHA1_Init(SHA_CTX *);
+int SHA1_Update(SHA_CTX *, const void *, size_t);
+int SHA1_Final(unsigned char *, SHA_CTX *);
diff --git a/strlcat.c b/strlcat.c
@@ -0,0 +1,54 @@
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ */
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+/*
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the
+ * full size of dst, not space left). At most dsize-1 characters
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)).
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
+ * If retval >= dsize, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t dsize)
+{
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ if (n-- == 0)
+ return(dlen + strlen(src));
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return(dlen + (src - osrc)); /* count does not include NUL */
+}
diff --git a/xml.c b/xml.c
@@ -0,0 +1,415 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xml.h"
+
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
+static void
+xml_parseattrs(XMLParser *x)
+{
+ size_t namelen = 0, valuelen;
+ int c, endsep, endname = 0, valuestart = 0;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (ISSPACE(c)) {
+ if (namelen)
+ endname = 1;
+ continue;
+ } else if (c == '?')
+ ; /* ignore */
+ else if (c == '=') {
+ x->name[namelen] = '\0';
+ valuestart = 1;
+ endname = 1;
+ } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
+ /* attribute without value */
+ x->name[namelen] = '\0';
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ endname = 0;
+ x->name[0] = c;
+ namelen = 1;
+ } else if (namelen && valuestart) {
+ /* attribute with value */
+ if (x->xmlattrstart)
+ x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
+
+ valuelen = 0;
+ if (c == '\'' || c == '"') {
+ endsep = c;
+ } else {
+ endsep = ' '; /* ISSPACE() */
+ goto startvalue;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
+startvalue:
+ if (c == '&') { /* entities */
+ x->data[valuelen] = '\0';
+ /* call data function with data before entity if there is data */
+ if (valuelen && x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
+ break;
+ if (valuelen < sizeof(x->data) - 1)
+ x->data[valuelen++] = c;
+ else {
+ /* entity too long for buffer, handle as normal data */
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[valuelen] = '\0';
+ if (x->xmlattrentity)
+ x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ valuelen = 0;
+ break;
+ }
+ }
+ } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
+ if (valuelen < sizeof(x->data) - 1) {
+ x->data[valuelen++] = c;
+ } else {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ x->data[0] = c;
+ valuelen = 1;
+ }
+ }
+ if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
+ x->data[valuelen] = '\0';
+ if (x->xmlattr)
+ x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ if (x->xmlattrend)
+ x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
+ break;
+ }
+ }
+ namelen = endname = valuestart = 0;
+ } else if (namelen < sizeof(x->name) - 1) {
+ x->name[namelen++] = c;
+ }
+ if (c == '>') {
+ break;
+ } else if (c == '/') {
+ x->isshorttag = 1;
+ x->name[0] = '\0';
+ namelen = 0;
+ }
+ }
+}
+
+static void
+xml_parsecomment(XMLParser *x)
+{
+ int c, i = 0;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '-') {
+ if (++i > 2)
+ i = 2;
+ continue;
+ } else if (c == '>' && i == 2) {
+ return;
+ } else if (i) {
+ i = 0;
+ }
+ }
+}
+
+static void
+xml_parsecdata(XMLParser *x)
+{
+ size_t datalen = 0, i = 0;
+ int c;
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == ']' || c == '>') {
+ if (x->xmlcdata && datalen) {
+ x->data[datalen] = '\0';
+ x->xmlcdata(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == ']') {
+ if (++i > 2) {
+ if (x->xmlcdata)
+ for (; i > 2; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ return;
+ } else if (i) {
+ if (x->xmlcdata)
+ for (; i > 0; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 0;
+ }
+
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmlcdata)
+ x->xmlcdata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+}
+
+static int
+codepointtoutf8(long r, char *s)
+{
+ if (r == 0) {
+ return 0; /* NUL byte */
+ } else if (r <= 0x7F) {
+ /* 1 byte: 0aaaaaaa */
+ s[0] = r;
+ return 1;
+ } else if (r <= 0x07FF) {
+ /* 2 bytes: 00000aaa aabbbbbb */
+ s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
+ s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
+ return 2;
+ } else if (r <= 0xFFFF) {
+ /* 3 bytes: aaaabbbb bbcccccc */
+ s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
+ s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
+ s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
+ return 3;
+ } else {
+ /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
+ s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
+ s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
+ s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
+ s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
+ return 4;
+ }
+}
+
+static int
+namedentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ static const struct {
+ const char *entity;
+ int c;
+ } entities[] = {
+ { "amp;", '&' },
+ { "lt;", '<' },
+ { "gt;", '>' },
+ { "apos;", '\'' },
+ { "quot;", '"' },
+ };
+ size_t i;
+
+ /* buffer is too small */
+ if (bufsiz < 2)
+ return -1;
+
+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
+ if (!strcmp(e, entities[i].entity)) {
+ buf[0] = entities[i].c;
+ buf[1] = '\0';
+ return 1;
+ }
+ }
+ return -1;
+}
+
+static int
+numericentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ long l;
+ int len;
+ char *end;
+
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+
+ errno = 0;
+ /* hex (16) or decimal (10) */
+ if (*e == 'x')
+ l = strtol(++e, &end, 16);
+ else
+ l = strtol(e, &end, 10);
+ /* invalid value or not a well-formed entity or invalid code point */
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
+ (l >= 0xd800 && l <= 0xdfff))
+ return -1;
+ len = codepointtoutf8(l, buf);
+ buf[len] = '\0';
+
+ return len;
+}
+
+/* convert named- or numeric entity string to buffer string
+ * returns byte-length of string or -1 on failure. */
+int
+xml_entitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ /* doesn't start with & */
+ if (e[0] != '&')
+ return -1;
+ /* numeric entity */
+ if (e[1] == '#')
+ return numericentitytostr(e + 2, buf, bufsiz);
+ else /* named entity */
+ return namedentitytostr(e + 1, buf, bufsiz);
+}
+
+void
+xml_parse(XMLParser *x)
+{
+ size_t datalen, tagdatalen;
+ int c, isend;
+
+ while ((c = GETNEXT()) != EOF && c != '<')
+ ; /* skip until < */
+
+ while (c != EOF) {
+ if (c == '<') { /* parse tag */
+ if ((c = GETNEXT()) == EOF)
+ return;
+
+ if (c == '!') { /* cdata and comments */
+ for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
+ /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
+ if (tagdatalen <= sizeof("[CDATA[") - 1)
+ x->data[tagdatalen++] = c;
+ if (c == '>')
+ break;
+ else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
+ (x->data[0] == '-')) {
+ xml_parsecomment(x);
+ break;
+ } else if (c == '[') {
+ if (tagdatalen == sizeof("[CDATA[") - 1 &&
+ !strncmp(x->data, "[CDATA[", tagdatalen)) {
+ xml_parsecdata(x);
+ break;
+ }
+ }
+ }
+ } else {
+ /* normal tag (open, short open, close), processing instruction. */
+ x->tag[0] = c;
+ x->taglen = 1;
+ x->isshorttag = isend = 0;
+
+ /* treat processing instruction as shorttag, don't strip "?" prefix. */
+ if (c == '?') {
+ x->isshorttag = 1;
+ } else if (c == '/') {
+ if ((c = GETNEXT()) == EOF)
+ return;
+ x->tag[0] = c;
+ isend = 1;
+ }
+
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '/')
+ x->isshorttag = 1; /* short tag */
+ else if (c == '>' || ISSPACE(c)) {
+ x->tag[x->taglen] = '\0';
+ if (isend) { /* end tag, starts with </ */
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ } else {
+ /* start tag */
+ if (x->xmltagstart)
+ x->xmltagstart(x, x->tag, x->taglen);
+ if (ISSPACE(c))
+ xml_parseattrs(x);
+ if (x->xmltagstartparsed)
+ x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
+ }
+ /* call tagend for shortform or processing instruction */
+ if (x->isshorttag) {
+ if (x->xmltagend)
+ x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
+ x->tag[0] = '\0';
+ x->taglen = 0;
+ }
+ break;
+ } else if (x->taglen < sizeof(x->tag) - 1)
+ x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
+ }
+ }
+ } else {
+ /* parse tag data */
+ datalen = 0;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '&') {
+ if (datalen) {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ }
+ x->data[0] = c;
+ datalen = 1;
+ while ((c = GETNEXT()) != EOF) {
+ if (c == '<')
+ break;
+ if (datalen < sizeof(x->data) - 1)
+ x->data[datalen++] = c;
+ else {
+ /* entity too long for buffer, handle as normal data */
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ break;
+ }
+ if (c == ';') {
+ x->data[datalen] = '\0';
+ if (x->xmldataentity)
+ x->xmldataentity(x, x->data, datalen);
+ datalen = 0;
+ break;
+ }
+ }
+ } else if (c != '<') {
+ if (datalen < sizeof(x->data) - 1) {
+ x->data[datalen++] = c;
+ } else {
+ x->data[datalen] = '\0';
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
+ x->data[0] = c;
+ datalen = 1;
+ }
+ }
+ if (c == '<') {
+ x->data[datalen] = '\0';
+ if (x->xmldata && datalen)
+ x->xmldata(x, x->data, datalen);
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/xml.h b/xml.h
@@ -0,0 +1,43 @@
+#ifndef _XML_H_
+#define _XML_H_
+
+#include <stdio.h>
+
+typedef struct xmlparser {
+ /* handlers */
+ void (*xmlattr)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t);
+ void (*xmldata)(struct xmlparser *, const char *, size_t);
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t);
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t);
+ void (*xmltagstartparsed)(struct xmlparser *, const char *,
+ size_t, int);
+
+#ifndef GETNEXT
+ /* GETNEXT overridden to reduce function call overhead and further
+ context optimizations. */
+ #define GETNEXT getchar
+#endif
+
+ /* current tag */
+ char tag[1024];
+ size_t taglen;
+ /* current tag is in short form ? <tag /> */
+ int isshorttag;
+ /* current attribute name */
+ char name[1024];
+ /* data buffer used for tag data, cdata and attribute data */
+ char data[BUFSIZ];
+} XMLParser;
+
+int xml_entitytostr(const char *, char *, size_t);
+void xml_parse(XMLParser *);
+#endif