pubsubhubbubblub

pubsubhubbub client implementation
git clone git://git.codemadness.org/pubsubhubbubblub
Log | Files | Refs | README | LICENSE

commit a9f9a229d5be860a5fdab051fbda7ece66d2dd64
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 28 May 2022 12:09:41 +0200

initial import

Diffstat:
ALICENSE | 15+++++++++++++++
AMakefile | 17+++++++++++++++++
AREADME | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahmac_sha1.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahmac_sha1.h | 4++++
Apubsub_cgi.c | 463+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apubsub_gethub.c | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apubsub_setup | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asha1.c | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asha1.h | 13+++++++++++++
Astrlcat.c | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Axml.c | 415+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Axml.h | 43+++++++++++++++++++++++++++++++++++++++++++
13 files changed, 1630 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2022 Hiltjo Posthuma <hiltjo@codemadness.org> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,17 @@ +.POSIX: + +PREFIX = /usr/local +CGIDIR = /var/www/cgi-bin + +build: clean + ${CC} -c sha1.c ${CFLAGS} ${CPPFLAGS} + ${CC} -c hmac_sha1.c ${CFLAGS} ${CPPFLAGS} + ${CC} -c strlcat.c xml.c ${CFLAGS} ${CPPFLAGS} + ${CC} -c pubsub_cgi.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE + ${CC} -c pubsub_gethub.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE + # link + ${CC} -o pubsub_cgi hmac_sha1.o sha1.o pubsub_cgi.o ${LDFLAGS} -static -s + ${CC} -o pubsub_gethub strlcat.o xml.o pubsub_gethub.o ${LDFLAGS} + +clean: + rm -f *.o pubsub_cgi pubsub_gethub diff --git a/README b/README @@ -0,0 +1,116 @@ +pubsubhubbubblub +---------------- + +Generic pubsubhubbub client implementation. +Helper scripts to use it with sfeed. + + +What is it +---------- + +pubsubhubbub is a publisher/subscriber technology used to push updates in a webhook-like way. +This allows to push content updates, instead of polling for news in an interval. + + +Features +-------- + +- Not many dependencies. +- Uses pledge and unveil on OpenBSD. +- Signatures (hub.secret) support, Pubsubhub 0.4 core SHA1 only. + + +Dependencies +------------ + +- C compiler + + +Files +----- + +pubsub_cgi.c - Small stupid PubSubHubBub implementation as a CGI program. +pubsub_gethub - Helper program extract a hub and feed URL from a RSS or Atom feed data. +pubsub_setup - Helper script that sets up the directory structure for + processing the feed for the CGI program. It has an + -s option to subscribe and an -u option to unsubscribe at a hub also. + + +How to install +-------------- + +For the CGI program: + +OpenBSD httpd and slowcgi, httpd.conf: + + location "/pubsub/**" { + request strip 1 + root "/cgi-bin/pubsub" + fastcgi socket "/run/slowcgi.sock" + } + +Compile cgi.c statically and copy it to /var/www/cgi-bin/pubsub + +- Create a directory with write-access for the pubsub CGI program + /var/www/pubsub-data/feedname. The setup_feed.sh script can be used to create + the directories. +- Make sure to set the proper permissions for the CGI program (slowcgi) and + HTTPd. +- The base name of the CGI script can be changed in the setup_feed.sh script. + + +How does it work +---------------- + +The CGI program https://codemadness.org/pubsub/slashdot/secrettoken + + +Directory structure: + +/pubsub-data/config/feedname/ - Directory with metadata about the feed. +/pubsub-data/config/feedname/hub - The hub URL, for example http://pubsubhubbub.appspot.com/ . +/pubsub-data/config/feedname/topic - hub.topic, the feed URL. +/pubsub-data/config/feedname/secret - hub.secret for calculating the message digest, + see Section 8 of Pubsubhubbub core 0.4. +/pubsub-data/config/feedname/token - File containing a line with a secret token. This makes sure an entrypoint + is not easy guessable (by different hubs etc). +/pubsub-data/feeds/feedname/ - Directory containing processed messages. +/pubsub-data/tmp/feedname/ - Temporary directory to process messages. + Moves to the feeds/feedname directory on success. +/pubsub-data/log - Log file, TAB-separated. + + +Example +------- + +Get the hub and feed URL: + + curl -s http://rss.slashdot.org/Slashdot/slashdot | pubsub_gethub + + http://rss.slashdot.org/Slashdot/slashdot self + http://pubsubhubbub.appspot.com/ hub + +Setup the feed for the CGI program: + cd /var/www/pubsub-data + pubsub_setup -s 'slashdot' 'http://pubsubhubbub.appspot.com/' 'http://rss.slashdot.org/Slashdot/slashdot' + + +Monitor script example +---------------------- + +This monitors the log file using tail(1) and uses sfeed and sfeed_plain to write the line to stdout. +This can then be piped to the suckless ii(1) program for IRC notifications for example. +It uses sfeed for parsing RSS and Atom content and formats it to a plain-text list. + + #!/bin/sh + cd /var/www/pubsub-data + tail -f log | \ + LC_ALL=C awk '{ print $2 "\t" $3; fflush(); }' | \ + while IFS=" " read -r feed file; do sfeed < "feeds/${feed}/${file}"; done | \ + sfeed_plain + + +References +---------- + +Pubsubhubbub core 0.4: https://pubsubhubbub.github.io/PubSubHubbub/pubsubhubbub-core-0.4.html diff --git a/hmac_sha1.c b/hmac_sha1.c @@ -0,0 +1,63 @@ +/* Adapted from RFC2104 hmac_md5, some code-style changes and data streaming support. */ + +#include <string.h> +#include <stdio.h> + +#include "hmac_sha1.h" + +void +hmac_sha1_init(SHA_CTX *ctx, const unsigned char *key, size_t key_len, +unsigned char *k_opad, size_t k_opadlen) +{ + SHA_CTX tctx; + unsigned char k_ipad[65]; /* inner padding - key XORd with ipad */ + unsigned char tk[20]; + int i; + + /* if key is longer than 64 bytes reset it to key=SHA1(key) */ + if (key_len > 64) { + SHA1_Init(&tctx); + SHA1_Update(&tctx, key, key_len); + SHA1_Final(tk, &tctx); + + key = tk; + key_len = 20; + } + + /* + * the HMAC_SHA1 transform looks like: + * + * SHA1(K XOR opad, SHA1(K XOR ipad, text)) + * + * where K is an n byte key + * ipad is the byte 0x36 repeated 64 times + * opad is the byte 0x5c repeated 64 times + * and text is the data being protected + */ + + /* start out by storing key in pads */ + memset(k_ipad, 0, sizeof(k_ipad)); + memset(k_opad, 0, k_opadlen); + memcpy(k_ipad, key, key_len); + memcpy(k_opad, key, key_len); + + /* XOR key with ipad and opad values */ + for (i = 0; i < 64; i++) { + k_ipad[i] ^= 0x36; + k_opad[i] ^= 0x5c; + } + /* perform inner SHA1 */ + SHA1_Init(ctx); /* init context for 1st pass */ + SHA1_Update(ctx, k_ipad, 64); /* start with inner pad */ +} + +void +hmac_sha1_final(SHA_CTX *ctx, const unsigned char *k_opad, unsigned char *digest) +{ + SHA1_Final(digest, ctx); /* finish up 1st pass */ + /* perform outer SHA1 */ + SHA1_Init(ctx); /* init context for 2nd pass */ + SHA1_Update(ctx, k_opad, 64); /* start with outer pad */ + SHA1_Update(ctx, digest, 20); /* then results of 1st hash */ + SHA1_Final(digest, ctx); /* finish up 2nd pass */ +} diff --git a/hmac_sha1.h b/hmac_sha1.h @@ -0,0 +1,4 @@ +#include "sha1.h" + +void hmac_sha1_init(SHA_CTX *, const unsigned char *, size_t, unsigned char *, size_t); +void hmac_sha1_final(SHA_CTX *, const unsigned char *, unsigned char *); diff --git a/pubsub_cgi.c b/pubsub_cgi.c @@ -0,0 +1,463 @@ +#include <sys/stat.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#ifdef __OpenBSD__ +#include <unistd.h> +#else +#define pledge(p1,p2) 0 +#define unveil(p1,p2) 0 +#endif + +#include "hmac_sha1.h" + +static const char *relpath = "/pubsub/"; + +#define DATADIR "/pubsub-data" + +static const char *configdir = DATADIR "/config"; +static const char *datadir = DATADIR "/feeds"; +static const char *tmpdir = DATADIR "/tmp"; +static const char *logfile = DATADIR "/log"; +static time_t now; + +char * +readfile(const char *path) +{ + static char buf[256]; + FILE *fp; + + if (!(fp = fopen(path, "rb"))) + goto err; + if (!fgets(buf, sizeof(buf), fp)) + goto err; + fclose(fp); + buf[strcspn(buf, "\n")] = '\0'; + return buf; + +err: + if (fp) + fclose(fp); + return NULL; +} + +int +hexdigit(int c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + + return 0; +} + +/* decode until NUL separator or end of "key". */ +int +decodeparamuntilend(char *buf, size_t bufsiz, const char *s, int end) +{ + size_t i; + + if (!bufsiz) + return -1; + + for (i = 0; *s && *s != end; s++) { + switch (*s) { + case '%': + if (i + 3 >= bufsiz) + return -1; + if (!isxdigit((unsigned char)*(s+1)) || + !isxdigit((unsigned char)*(s+2))) + return -1; + buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2)); + s += 2; + break; + case '+': + if (i + 1 >= bufsiz) + return -1; + buf[i++] = ' '; + break; + default: + if (i + 1 >= bufsiz) + return -1; + buf[i++] = *s; + break; + } + } + buf[i] = '\0'; + + return i; +} + +/* decode until NUL separator or end of "key". */ +int +decodeparam(char *buf, size_t bufsiz, const char *s) +{ + return decodeparamuntilend(buf, bufsiz, s, '&'); +} + +char * +getparam(const char *query, const char *s) +{ + const char *p, *last = NULL; + size_t len; + + len = strlen(s); + for (p = query; (p = strstr(p, s)); p += len) { + if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '?')) + last = p + len + 1; + } + + return (char *)last; +} + +const char * +httpstatusmsg(int code) +{ + switch (code) { + case 200: return "200 OK"; + case 202: return "202 Accepted"; + case 400: return "400 Bad Request"; + case 403: return "403 Forbidden"; + case 404: return "404 Not Found"; + case 500: return "500 Internal Server Error"; + } + return NULL; +} + +void +httpstatus(int code) +{ + const char *msg; + + if ((msg = httpstatusmsg(code))) + printf("Status: %s\r\n", msg); +} + +void +httperror(int code, const char *s) +{ + httpstatus(code); + fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); + fputs("\r\n", stdout); + if (s) + printf("%s: %s\r\n", httpstatusmsg(code), s); + else + printf("%s\r\n", httpstatusmsg(code)); + exit(0); +} + +void +badrequest(const char *s) +{ + httperror(400, s); +} + +void +forbidden(const char *s) +{ + httperror(403, s); +} + +void +notfound(const char *s) +{ + httperror(404, s); +} + +void +servererror(const char *s) +{ + httperror(500, s); +} + +void +logrequest(const char *feedname, const char *filename, const char *signature) +{ + FILE *fp; + + /* file format: timestamp TAB feedname TAB data-filename */ + if (!(fp = fopen(logfile, "a"))) + servererror("cannot write data"); + fprintf(fp, "%lld\t", (long long)now); + fputs(feedname, fp); + fputs("\t", fp); + fputs(filename, fp); + fputs("\t", fp); + fputs(signature, fp); + fputs("\n", fp); + fclose(fp); +} + +char * +contenttypetoext(const char *s) +{ + return "xml"; /* for now just support XML, for RSS and Atom */ +} + +int +main(void) +{ + FILE *fpdata; + char challenge[256], mode[32] = "", signature[128] = ""; + char requesturi[4096], requesturidecoded[4096]; + char feedname[256], token[256] = ""; + char filename[PATH_MAX], tmpfilename[PATH_MAX]; + char configpath[PATH_MAX], feedpath[PATH_MAX], secretpath[PATH_MAX]; + char tokenpath[PATH_MAX]; + char *contentlength = "", *contenttype = "", *method = "GET", *query = ""; + char *p, *fileext, *tmp; + char buf[4096]; + size_t n, total; + long long ll; + int i, j, fd, r; + /* HMAC */ + SHA_CTX ctx; + unsigned char key_opad[65]; /* outer padding - key XORd with opad */ + unsigned char *key; + size_t key_len; + unsigned char digest[SHA_DIGEST_LENGTH]; + unsigned char inputdigest[SHA_DIGEST_LENGTH]; + + if (unveil(DATADIR, "rwc") == -1) + err(1, "unveil"); + if (pledge("stdio rpath wpath cpath fattr", NULL) == -1) + err(1, "pledge"); + + if ((tmp = getenv("CONTENT_TYPE"))) + contenttype = tmp; + if ((tmp = getenv("CONTENT_LENGTH"))) + contentlength = tmp; + if ((tmp = getenv("REQUEST_METHOD"))) + method = tmp; + if ((tmp = getenv("QUERY_STRING"))) + query = tmp; + + /* "8. Authenticated Content Distribution" */ + if ((p = getenv("HTTP_X_HUB_SIGNATURE"))) { + r = snprintf(signature, sizeof(signature), "%s", p); + if (r < 0 || (size_t)r >= sizeof(signature)) + badrequest("invalid signature (truncated)"); + + /* accept sha1=digest or sha=digest */ + if ((tmp = strstr(signature, "sha1="))) + tmp += sizeof("sha1=") - 1; + else if ((tmp = strstr(signature, "sha="))) + tmp += sizeof("sha=") - 1; + if (tmp) { + for (p = tmp, i = 0; *p; p++, i++) { + if (!isxdigit((unsigned char)*p)) + break; + } + } + if (tmp && !*p && i == (SHA_DIGEST_LENGTH * 2)) { + for (i = 0, j = 0, p = tmp; i < SHA_DIGEST_LENGTH; i++, j += 2) { + inputdigest[i] = (hexdigit(p[j]) << 4) | + hexdigit(p[j + 1]); + } + } else { + badrequest("invalid hash format"); + } + } + + if (!(p = getenv("REQUEST_URI"))) + p = ""; + snprintf(requesturi, sizeof(requesturi), "%s", p); + if ((p = strchr(requesturi, '?'))) + *p = '\0'; /* remove query string */ + + if (decodeparamuntilend(requesturidecoded, sizeof(requesturidecoded), requesturi, '\0') == -1) + badrequest("request URI"); + + p = requesturidecoded; + if (strncmp(p, relpath, strlen(relpath))) + forbidden("invalid relative path"); + p += strlen(relpath); + + /* first part of path of request URI is the feedname, last part is the (optional) token */ + if ((tmp = strchr(p, '/'))) { + *tmp = '\0'; /* temporary NUL terminate */ + + r = snprintf(feedname, sizeof(feedname), "%s", p); + if (r < 0 || (size_t)r >= sizeof(feedname)) + servererror("path truncated"); + + r = snprintf(token, sizeof(token), "%s", tmp + 1); + if (r < 0 || (size_t)r >= sizeof(token)) + servererror("path truncated"); + + *tmp = '/'; /* restore NUL byte to '/' */ + } else { + r = snprintf(feedname, sizeof(feedname), "%s", p); + if (r < 0 || (size_t)r >= sizeof(feedname)) + servererror("path truncated"); + } + if (strstr(feedname, "..")) + badrequest("invalid feed name"); + + /* check if configdir of feedname exists, else skip request and return 404 */ + r = snprintf(configpath, sizeof(configpath), "%s/%s", configdir, feedname); + if (r < 0 || (size_t)r >= sizeof(configpath)) + servererror("path truncated"); + if (access(configpath, X_OK) == -1) + notfound("feed entrypoint does not exist"); + + r = snprintf(tokenpath, sizeof(tokenpath), "%s/%s/token", configdir, feedname); + if (r < 0 || (size_t)r >= sizeof(tokenpath)) + servererror("path truncated"); + if ((tmp = readfile(tokenpath))) { + if (strcmp(tmp, token)) + forbidden("missing or incorrect token in path"); + } + + if (!strcasecmp(method, "POST")) { + if (!feedname[0]) + badrequest("feed name part of path is missing"); + + /* read secret, initialize for HMAC and data signature verification */ + r = snprintf(secretpath, sizeof(secretpath), "%s/%s/secret", configdir, feedname); + if (r < 0 || (size_t)r >= sizeof(secretpath)) + servererror("path truncated"); + key = readfile(secretpath); + if (key && !signature[0]) + forbidden("requires signature header X-Hub-Signature"); + + if (key) { + key_len = strlen(key); + hmac_sha1_init(&ctx, key, key_len, key_opad, sizeof(key_opad)); + } + + /* temporary file with random characters */ + if ((now = time(NULL)) == (time_t)-1) + servererror("cannot get current time"); + r = snprintf(tmpfilename, sizeof(tmpfilename), "%s/%s/%lld.XXXXXX", tmpdir, feedname, (long long)now); + if (r < 0 || (size_t)r >= sizeof(tmpfilename)) + servererror("path truncated"); + + if ((fd = mkstemp(tmpfilename)) == -1) + servererror("cannot create tmpfilename"); + if (!(fpdata = fdopen(fd, "wb"))) + servererror(tmpfilename); + + total = 0; + while ((n = fread(buf, 1, sizeof(buf), stdin)) == sizeof(buf)) { + if (fwrite(buf, 1, n, fpdata) != n) + break; + if (key) + SHA1_Update(&ctx, buf, n); /* hash data for signature */ + total += n; + } + if (n) { + fwrite(buf, 1, n, fpdata); + if (key) + SHA1_Update(&ctx, buf, n); + total += n; + } + if (ferror(stdin)) { + fclose(fpdata); + unlink(tmpfilename); + servererror("cannot process POST message: read error"); + } + if (fflush(fpdata) || ferror(fpdata)) { + fclose(fpdata); + unlink(tmpfilename); + servererror("cannot process POST message: write error"); + } + fclose(fpdata); + chmod(tmpfilename, 0644); + + /* if Content-Length is set then check if it matches */ + if (contentlength[0]) { + ll = strtoll(contentlength, NULL, 10); + if (ll < 0 || (size_t)ll != total) { + unlink(tmpfilename); + badrequest("Content-Length does not match"); + } + } + + if (key) { + /* finalize signature digest */ + hmac_sha1_final(&ctx, key_opad, digest); + + /* compare digest */ + if (memcmp(inputdigest, digest, sizeof(digest))) { + unlink(tmpfilename); + forbidden("invalid digest for data"); + } + } + + /* use part of basename of the random temp file as the filename */ + if (!(tmp = strrchr(tmpfilename, '/'))) + servererror("invalid path"); /* cannot happen */ + r = snprintf(feedpath, sizeof(feedpath), "%s/%s", datadir, feedname); + if (r < 0 || (size_t)r >= sizeof(feedpath)) + servererror("path truncated"); + fileext = contenttypetoext(contenttype); + r = snprintf(filename, sizeof(filename), "%s/%s%s%s", feedpath, tmp + 1, + fileext[0] ? "." : "", fileext); + if (r < 0 || (size_t)r >= sizeof(filename)) + servererror("path truncated"); + + if ((r = rename(tmpfilename, filename)) != 0) { + unlink(filename); + unlink(tmpfilename); + servererror("cannot process POST message: failed to rename file"); + } + chmod(filename, 0644); + + httpstatus(200); + fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); + fputs("\r\n", stdout); + + /* output stored file: feedname, basename of the file */ + if ((tmp = strrchr(filename, '/'))) + tmp++; + else + tmp = ""; + printf("%s/%s\n", feedname, tmp); + + /* write to a log file, this could be a pipe or used with tail -f to monitor */ + logrequest(feedname, tmp, signature); + + return 0; + } + + if ((p = getparam(query, "hub.mode"))) { + if (decodeparam(mode, sizeof(mode), p) == -1) + badrequest("hub.mode"); + } + + if (!strcmp(mode, "subscribe") || !strcmp(mode, "unsubscribe")) { + if ((p = getparam(query, "hub.challenge"))) { + if (decodeparam(challenge, sizeof(challenge), p) == -1) + badrequest("hub.challenge"); + } + if (!challenge[0]) + badrequest("hub.challenge is required, but is missing"); + + httpstatus(202); + fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); + fputs("\r\n", stdout); + printf("%s\r\n", challenge); + return 0; + } else if (mode[0]) { + badrequest("hub.mode: only subscribe or unsubscribe is supported"); + } + + httpstatus(200); + fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout); + fputs("\r\n", stdout); + printf("pubsubhubbubblub running perfectly and flapping graciously in the wind.\r\n"); + + return 0; +} diff --git a/pubsub_gethub.c b/pubsub_gethub.c @@ -0,0 +1,149 @@ +#include <err.h> +#include <stdio.h> +#include <strings.h> +#include <unistd.h> + +#undef strlcat +size_t strlcat(char *, const char *, size_t); + +#include "xml.h" + +#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f) +#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c)) + +/* string and size */ +#define STRP(s) s,sizeof(s)-1 + +static XMLParser parser; +static int islinktag, ishrefattr, isrelattr; +static char linkhref[4096], linkrel[256]; + +/* strcasestr() included for portability */ +char * +strcasestr(const char *h, const char *n) +{ + size_t i; + + if (!n[0]) + return (char *)h; + + for (; *h; ++h) { + for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == + TOLOWER((unsigned char)h[i]); ++i) + ; + if (n[i] == '\0') + return (char *)h; + } + + return NULL; +} + +static void +printvalue(const char *s) +{ + for (; *s; s++) + if (!ISCNTRL((unsigned char)*s)) + putchar(*s); +} + +static void +xmltagstart(XMLParser *p, const char *t, size_t tl) +{ + islinktag = 0; + char *l; + + if (((l = strcasestr(t, ":link")) && !strcasecmp(l, ":link")) || + !strcasecmp(t, "link")) { + islinktag = 1; + linkhref[0] = '\0'; + linkrel[0] = '\0'; + } +} + +static void +xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) +{ + if (!islinktag) + return; + + if (strncasecmp(linkrel, STRP("hub")) && + strncasecmp(linkrel, STRP("self"))) + return; + + printvalue(linkhref); + putchar('\t'); + printvalue(linkrel); + putchar('\n'); +} + +static void +xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al) +{ + ishrefattr = isrelattr = 0; + + if (!islinktag) + return; + + if (!strcasecmp(a, "href")) { + ishrefattr = 1; + linkhref[0] = '\0'; + } else if (!strcasecmp(a, "rel")) { + isrelattr = 1; + linkrel[0] = '\0'; + } +} + +static void +xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, + const char *v, size_t vl) +{ + if (islinktag) { + if (ishrefattr) + strlcat(linkhref, v, sizeof(linkhref)); + else if (isrelattr) + strlcat(linkrel, v, sizeof(linkrel)); + } +} + +static void +xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al, + const char *v, size_t vl) +{ + char buf[16]; + int len; + + if (!ishrefattr && !isrelattr) + return; + + /* try to translate entity, else just pass as data to + * xmlattr handler. */ + if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) + xmlattr(p, t, tl, a, al, buf, (size_t)len); + else + xmlattr(p, t, tl, a, al, v, vl); +} + +int +main(void) +{ +#ifdef __OpenBSD__ + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); +#endif + + parser.xmlattr = xmlattr; + parser.xmlattrentity = xmlattrentity; + parser.xmlattrstart = xmlattrstart; + parser.xmltagstart = xmltagstart; + parser.xmltagstartparsed = xmltagstartparsed; + + /* NOTE: getnext is defined in xml.h for inline optimization */ + xml_parse(&parser); + + if (ferror(stdin)) + fputs("read error: <stdin>\n", stderr);; + if (fflush(stdout) || ferror(stdout)) + fputs("write error: <stdout>\n", stderr); + + return 0; +} diff --git a/pubsub_setup b/pubsub_setup @@ -0,0 +1,133 @@ +#!/bin/sh + +while getopts "c:su" f; do + case "${f}" in + s) dosubscribe=1;; + u) dounsubscribe=1;; + esac +done +shift $(expr ${OPTIND} - 1) + +base="https://codemadness.org/pubsub/" + +# Linux +shacmd="$(command -v sha256sum)" +# BSD +test "${shacmd}" = "" && shacmd=$(command -v sha256) +if test "${shacmd}" = ""; then + echo "No sha256 or sha256sum tool found" >&2 + exit 1 +fi + +# sha() +sha() { + ${shacmd} | cut -f 1 -d ' ' +} + +# log(s) +log() { + echo "$1" >&2 +} + +# subscribe(feedname, hub, topic, callback, mode, secret) +subscribe() { + feedname="$1" + hub="$2" + topic="$3" + callback="$4" + mode="${5:-subscribe}" + secret="$6" + verify="async" # or "sync" + lease_seconds="" + +# if curl -s -f -H 'User-Agent:' -m 15 \ + # DEBUG + if curl -v -f -H 'User-Agent:' -m 15 \ + -L --max-redirs 3 \ + --data-raw "hub.callback=${callback}" \ + --data-raw "hub.lease_seconds=${lease_seconds}" \ + --data-raw "hub.mode=${mode}" \ + --data-raw "hub.secret=${secret}" \ + --data-raw "hub.topic=${topic}" \ + --data-raw "hub.verify=${verify}" \ + "${hub}/subscribe"; then + log "${mode} OK" + return 0 + else + log "${mode} FAIL" + return 1 + fi +} + +feedname="$1" +hub="$2" +topic="$3" +if test "$1" = "" -o "$2" = "" -o "$3" = ""; then + echo "usage: $0 [-s] [-u] <feedname> <hub> <topic>" >&2 + exit 1 +fi + +isnew=1 +test -d "config/${feedname}" && isnew=0 + +mkdir -p "config/${feedname}" +mkdir -p "feeds/${feedname}" +mkdir -p "tmp/${feedname}" + +# general log +touch "log" + +if test "${dosubscribe}" = "1"; then + f="config/${feedname}/hub" + if test -f "${f}"; then + echo "already registered? file exists: ${f}, skipping subscribing" >&2 + exit 1 + fi +fi + +# generate random token if it does not exist. +f="config/${feedname}/token" +if ! test -f "${f}" -a "${isnew}" = "1"; then + token="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)" + echo "${token}" > "${f}" +fi + +# generate random secret if it does not exist. +f="config/${feedname}/secret" +if ! test -f "${f}" -a "${isnew}" = "1"; then + secret="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)" + echo "${secret}" > "${f}" +fi + +# read config. +f="config/${feedname}/token" +token=$(cat "${f}" 2>/dev/null) +callback="$1/${token}" +f="config/${feedname}/secret" +secret=$(cat "${f}" 2>/dev/null) + +callback="${base}${feedname}/${token}" + +if test "${dosubscribe}" = "1"; then + f="config/${feedname}/hub" + if test -f "${f}"; then + echo "already registered? file exists: ${f}, skipping subscribing" >&2 + exit 1 + fi + + # register at hub. save state when succesfully registered. + if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "subscribe" "${secret}"; then + printf '%s\n' "${callback}" > "config/${feedname}/callback" + printf '%s\n' "${hub}" > "config/${feedname}/hub" + printf '%s\n' "${topic}" > "config/${feedname}/topic" + fi +fi + +if test "${dounsubscribe}" = "1"; then + # unregister at hub. remove state when succesfully registered. + if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "unsubscribe" "${secret}"; then + rm -f "config/${feedname}/callback" + rm -f "config/${feedname}/hub" + rm -f "config/${feedname}/topic" + fi +fi diff --git a/sha1.c b/sha1.c @@ -0,0 +1,145 @@ +/* Public domain SHA1 implementation based on RFC3174 and libtomcrypt + Modified to make function prototypes compatible with OpenSSL / LibreSSL. */ + +#include <stdint.h> +#include <string.h> + +#include "sha1.h" + +static uint32_t rol(uint32_t n, int k) { return (n << k) | (n >> (32-k)); } +#define F0(b,c,d) (d ^ (b & (c ^ d))) +#define F1(b,c,d) (b ^ c ^ d) +#define F2(b,c,d) ((b & c) | (d & (b | c))) +#define F3(b,c,d) (b ^ c ^ d) +#define G0(a,b,c,d,e,i) e += rol(a,5)+F0(b,c,d)+W[i]+0x5A827999; b = rol(b,30) +#define G1(a,b,c,d,e,i) e += rol(a,5)+F1(b,c,d)+W[i]+0x6ED9EBA1; b = rol(b,30) +#define G2(a,b,c,d,e,i) e += rol(a,5)+F2(b,c,d)+W[i]+0x8F1BBCDC; b = rol(b,30) +#define G3(a,b,c,d,e,i) e += rol(a,5)+F3(b,c,d)+W[i]+0xCA62C1D6; b = rol(b,30) + +static void +processblock(SHA_CTX *s, const unsigned char *buf) +{ + uint32_t W[80], a, b, c, d, e; + int i; + + for (i = 0; i < 16; i++) { + W[i] = (uint32_t)buf[4*i]<<24; + W[i] |= (uint32_t)buf[4*i+1]<<16; + W[i] |= (uint32_t)buf[4*i+2]<<8; + W[i] |= buf[4*i+3]; + } + for (; i < 80; i++) + W[i] = rol(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1); + a = s->h[0]; + b = s->h[1]; + c = s->h[2]; + d = s->h[3]; + e = s->h[4]; + for (i = 0; i < 20; ) { + G0(a,b,c,d,e,i++); + G0(e,a,b,c,d,i++); + G0(d,e,a,b,c,i++); + G0(c,d,e,a,b,i++); + G0(b,c,d,e,a,i++); + } + while (i < 40) { + G1(a,b,c,d,e,i++); + G1(e,a,b,c,d,i++); + G1(d,e,a,b,c,i++); + G1(c,d,e,a,b,i++); + G1(b,c,d,e,a,i++); + } + while (i < 60) { + G2(a,b,c,d,e,i++); + G2(e,a,b,c,d,i++); + G2(d,e,a,b,c,i++); + G2(c,d,e,a,b,i++); + G2(b,c,d,e,a,i++); + } + while (i < 80) { + G3(a,b,c,d,e,i++); + G3(e,a,b,c,d,i++); + G3(d,e,a,b,c,i++); + G3(c,d,e,a,b,i++); + G3(b,c,d,e,a,i++); + } + s->h[0] += a; + s->h[1] += b; + s->h[2] += c; + s->h[3] += d; + s->h[4] += e; +} + +static void +pad(SHA_CTX *c) +{ + unsigned r = c->len % 64; + + c->buf[r++] = 0x80; + if (r > 56) { + memset(c->buf + r, 0, 64 - r); + r = 0; + processblock(c, c->buf); + } + memset(c->buf + r, 0, 56 - r); + c->len *= 8; + c->buf[56] = c->len >> 56; + c->buf[57] = c->len >> 48; + c->buf[58] = c->len >> 40; + c->buf[59] = c->len >> 32; + c->buf[60] = c->len >> 24; + c->buf[61] = c->len >> 16; + c->buf[62] = c->len >> 8; + c->buf[63] = c->len; + processblock(c, c->buf); +} + +int +SHA1_Init(SHA_CTX *c) +{ + c->len = 0; + c->h[0] = 0x67452301; + c->h[1] = 0xEFCDAB89; + c->h[2] = 0x98BADCFE; + c->h[3] = 0x10325476; + c->h[4] = 0xC3D2E1F0; + return 1; +} + +int +SHA1_Update(SHA_CTX *c, const void *m, size_t len) +{ + const uint8_t *p = m; + unsigned r = c->len % 64; + + c->len += len; + if (r) { + if (len < 64 - r) { + memcpy(c->buf + r, p, len); + return 1; + } + memcpy(c->buf + r, p, 64 - r); + len -= 64 - r; + p += 64 - r; + processblock(c, c->buf); + } + for (; len >= 64; len -= 64, p += 64) + processblock(c, p); + memcpy(c->buf, p, len); + return 1; +} + +int +SHA1_Final(unsigned char *md, SHA_CTX *c) +{ + int i; + + pad(c); + for (i = 0; i < 5; i++) { + md[4 * i] = c->h[i] >> 24; + md[4 * i + 1] = c->h[i] >> 16; + md[4 * i + 2] = c->h[i] >> 8; + md[4 * i + 3] = c->h[i]; + } + return 1; +} diff --git a/sha1.h b/sha1.h @@ -0,0 +1,13 @@ +#include <stdint.h> + +typedef struct sha1 { + uint64_t len; /* processed message length */ + uint32_t h[5]; /* hash state */ + uint8_t buf[64]; /* message block buffer */ +} SHA_CTX; + +#define SHA_DIGEST_LENGTH 20 + +int SHA1_Init(SHA_CTX *); +int SHA1_Update(SHA_CTX *, const void *, size_t); +int SHA1_Final(unsigned char *, SHA_CTX *); diff --git a/strlcat.c b/strlcat.c @@ -0,0 +1,54 @@ +/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ */ + +/* + * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <string.h> + +/* + * Appends src to string dst of size dsize (unlike strncat, dsize is the + * full size of dst, not space left). At most dsize-1 characters + * will be copied. Always NUL terminates (unless dsize <= strlen(dst)). + * Returns strlen(src) + MIN(dsize, strlen(initial dst)). + * If retval >= dsize, truncation occurred. + */ +size_t +strlcat(char *dst, const char *src, size_t dsize) +{ + const char *odst = dst; + const char *osrc = src; + size_t n = dsize; + size_t dlen; + + /* Find the end of dst and adjust bytes left but don't go past end. */ + while (n-- != 0 && *dst != '\0') + dst++; + dlen = dst - odst; + n = dsize - dlen; + + if (n-- == 0) + return(dlen + strlen(src)); + while (*src != '\0') { + if (n != 0) { + *dst++ = *src; + n--; + } + src++; + } + *dst = '\0'; + + return(dlen + (src - osrc)); /* count does not include NUL */ +} diff --git a/xml.c b/xml.c @@ -0,0 +1,415 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xml.h" + +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) + +static void +xml_parseattrs(XMLParser *x) +{ + size_t namelen = 0, valuelen; + int c, endsep, endname = 0, valuestart = 0; + + while ((c = GETNEXT()) != EOF) { + if (ISSPACE(c)) { + if (namelen) + endname = 1; + continue; + } else if (c == '?') + ; /* ignore */ + else if (c == '=') { + x->name[namelen] = '\0'; + valuestart = 1; + endname = 1; + } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) { + /* attribute without value */ + x->name[namelen] = '\0'; + if (x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0); + if (x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); + endname = 0; + x->name[0] = c; + namelen = 1; + } else if (namelen && valuestart) { + /* attribute with value */ + if (x->xmlattrstart) + x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen); + + valuelen = 0; + if (c == '\'' || c == '"') { + endsep = c; + } else { + endsep = ' '; /* ISSPACE() */ + goto startvalue; + } + + while ((c = GETNEXT()) != EOF) { +startvalue: + if (c == '&') { /* entities */ + x->data[valuelen] = '\0'; + /* call data function with data before entity if there is data */ + if (valuelen && x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + while ((c = GETNEXT()) != EOF) { + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) + break; + if (valuelen < sizeof(x->data) - 1) + x->data[valuelen++] = c; + else { + /* entity too long for buffer, handle as normal data */ + x->data[valuelen] = '\0'; + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + break; + } + if (c == ';') { + x->data[valuelen] = '\0'; + if (x->xmlattrentity) + x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + valuelen = 0; + break; + } + } + } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) { + if (valuelen < sizeof(x->data) - 1) { + x->data[valuelen++] = c; + } else { + x->data[valuelen] = '\0'; + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + x->data[0] = c; + valuelen = 1; + } + } + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) { + x->data[valuelen] = '\0'; + if (x->xmlattr) + x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); + if (x->xmlattrend) + x->xmlattrend(x, x->tag, x->taglen, x->name, namelen); + break; + } + } + namelen = endname = valuestart = 0; + } else if (namelen < sizeof(x->name) - 1) { + x->name[namelen++] = c; + } + if (c == '>') { + break; + } else if (c == '/') { + x->isshorttag = 1; + x->name[0] = '\0'; + namelen = 0; + } + } +} + +static void +xml_parsecomment(XMLParser *x) +{ + int c, i = 0; + + while ((c = GETNEXT()) != EOF) { + if (c == '-') { + if (++i > 2) + i = 2; + continue; + } else if (c == '>' && i == 2) { + return; + } else if (i) { + i = 0; + } + } +} + +static void +xml_parsecdata(XMLParser *x) +{ + size_t datalen = 0, i = 0; + int c; + + while ((c = GETNEXT()) != EOF) { + if (c == ']' || c == '>') { + if (x->xmlcdata && datalen) { + x->data[datalen] = '\0'; + x->xmlcdata(x, x->data, datalen); + datalen = 0; + } + } + + if (c == ']') { + if (++i > 2) { + if (x->xmlcdata) + for (; i > 2; i--) + x->xmlcdata(x, "]", 1); + i = 2; + } + continue; + } else if (c == '>' && i == 2) { + return; + } else if (i) { + if (x->xmlcdata) + for (; i > 0; i--) + x->xmlcdata(x, "]", 1); + i = 0; + } + + if (datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if (x->xmlcdata) + x->xmlcdata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } +} + +static int +codepointtoutf8(long r, char *s) +{ + if (r == 0) { + return 0; /* NUL byte */ + } else if (r <= 0x7F) { + /* 1 byte: 0aaaaaaa */ + s[0] = r; + return 1; + } else if (r <= 0x07FF) { + /* 2 bytes: 00000aaa aabbbbbb */ + s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ + s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ + return 2; + } else if (r <= 0xFFFF) { + /* 3 bytes: aaaabbbb bbcccccc */ + s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ + s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ + s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ + return 3; + } else { + /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ + s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ + s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ + s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ + s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ + return 4; + } +} + +static int +namedentitytostr(const char *e, char *buf, size_t bufsiz) +{ + static const struct { + const char *entity; + int c; + } entities[] = { + { "amp;", '&' }, + { "lt;", '<' }, + { "gt;", '>' }, + { "apos;", '\'' }, + { "quot;", '"' }, + }; + size_t i; + + /* buffer is too small */ + if (bufsiz < 2) + return -1; + + for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) { + if (!strcmp(e, entities[i].entity)) { + buf[0] = entities[i].c; + buf[1] = '\0'; + return 1; + } + } + return -1; +} + +static int +numericentitytostr(const char *e, char *buf, size_t bufsiz) +{ + long l; + int len; + char *end; + + /* buffer is too small */ + if (bufsiz < 5) + return -1; + + errno = 0; + /* hex (16) or decimal (10) */ + if (*e == 'x') + l = strtol(++e, &end, 16); + else + l = strtol(e, &end, 10); + /* invalid value or not a well-formed entity or invalid code point */ + if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff || + (l >= 0xd800 && l <= 0xdfff)) + return -1; + len = codepointtoutf8(l, buf); + buf[len] = '\0'; + + return len; +} + +/* convert named- or numeric entity string to buffer string + * returns byte-length of string or -1 on failure. */ +int +xml_entitytostr(const char *e, char *buf, size_t bufsiz) +{ + /* doesn't start with & */ + if (e[0] != '&') + return -1; + /* numeric entity */ + if (e[1] == '#') + return numericentitytostr(e + 2, buf, bufsiz); + else /* named entity */ + return namedentitytostr(e + 1, buf, bufsiz); +} + +void +xml_parse(XMLParser *x) +{ + size_t datalen, tagdatalen; + int c, isend; + + while ((c = GETNEXT()) != EOF && c != '<') + ; /* skip until < */ + + while (c != EOF) { + if (c == '<') { /* parse tag */ + if ((c = GETNEXT()) == EOF) + return; + + if (c == '!') { /* cdata and comments */ + for (tagdatalen = 0; (c = GETNEXT()) != EOF;) { + /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */ + if (tagdatalen <= sizeof("[CDATA[") - 1) + x->data[tagdatalen++] = c; + if (c == '>') + break; + else if (c == '-' && tagdatalen == sizeof("--") - 1 && + (x->data[0] == '-')) { + xml_parsecomment(x); + break; + } else if (c == '[') { + if (tagdatalen == sizeof("[CDATA[") - 1 && + !strncmp(x->data, "[CDATA[", tagdatalen)) { + xml_parsecdata(x); + break; + } + } + } + } else { + /* normal tag (open, short open, close), processing instruction. */ + x->tag[0] = c; + x->taglen = 1; + x->isshorttag = isend = 0; + + /* treat processing instruction as shorttag, don't strip "?" prefix. */ + if (c == '?') { + x->isshorttag = 1; + } else if (c == '/') { + if ((c = GETNEXT()) == EOF) + return; + x->tag[0] = c; + isend = 1; + } + + while ((c = GETNEXT()) != EOF) { + if (c == '/') + x->isshorttag = 1; /* short tag */ + else if (c == '>' || ISSPACE(c)) { + x->tag[x->taglen] = '\0'; + if (isend) { /* end tag, starts with </ */ + if (x->xmltagend) + x->xmltagend(x, x->tag, x->taglen, x->isshorttag); + x->tag[0] = '\0'; + x->taglen = 0; + } else { + /* start tag */ + if (x->xmltagstart) + x->xmltagstart(x, x->tag, x->taglen); + if (ISSPACE(c)) + xml_parseattrs(x); + if (x->xmltagstartparsed) + x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); + } + /* call tagend for shortform or processing instruction */ + if (x->isshorttag) { + if (x->xmltagend) + x->xmltagend(x, x->tag, x->taglen, x->isshorttag); + x->tag[0] = '\0'; + x->taglen = 0; + } + break; + } else if (x->taglen < sizeof(x->tag) - 1) + x->tag[x->taglen++] = c; /* NOTE: tag name truncation */ + } + } + } else { + /* parse tag data */ + datalen = 0; + while ((c = GETNEXT()) != EOF) { + if (c == '&') { + if (datalen) { + x->data[datalen] = '\0'; + if (x->xmldata) + x->xmldata(x, x->data, datalen); + } + x->data[0] = c; + datalen = 1; + while ((c = GETNEXT()) != EOF) { + if (c == '<') + break; + if (datalen < sizeof(x->data) - 1) + x->data[datalen++] = c; + else { + /* entity too long for buffer, handle as normal data */ + x->data[datalen] = '\0'; + if (x->xmldata) + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + break; + } + if (c == ';') { + x->data[datalen] = '\0'; + if (x->xmldataentity) + x->xmldataentity(x, x->data, datalen); + datalen = 0; + break; + } + } + } else if (c != '<') { + if (datalen < sizeof(x->data) - 1) { + x->data[datalen++] = c; + } else { + x->data[datalen] = '\0'; + if (x->xmldata) + x->xmldata(x, x->data, datalen); + x->data[0] = c; + datalen = 1; + } + } + if (c == '<') { + x->data[datalen] = '\0'; + if (x->xmldata && datalen) + x->xmldata(x, x->data, datalen); + break; + } + } + } + } +} diff --git a/xml.h b/xml.h @@ -0,0 +1,43 @@ +#ifndef _XML_H_ +#define _XML_H_ + +#include <stdio.h> + +typedef struct xmlparser { + /* handlers */ + void (*xmlattr)(struct xmlparser *, const char *, size_t, + const char *, size_t, const char *, size_t); + void (*xmlattrend)(struct xmlparser *, const char *, size_t, + const char *, size_t); + void (*xmlattrstart)(struct xmlparser *, const char *, size_t, + const char *, size_t); + void (*xmlattrentity)(struct xmlparser *, const char *, size_t, + const char *, size_t, const char *, size_t); + void (*xmlcdata)(struct xmlparser *, const char *, size_t); + void (*xmldata)(struct xmlparser *, const char *, size_t); + void (*xmldataentity)(struct xmlparser *, const char *, size_t); + void (*xmltagend)(struct xmlparser *, const char *, size_t, int); + void (*xmltagstart)(struct xmlparser *, const char *, size_t); + void (*xmltagstartparsed)(struct xmlparser *, const char *, + size_t, int); + +#ifndef GETNEXT + /* GETNEXT overridden to reduce function call overhead and further + context optimizations. */ + #define GETNEXT getchar +#endif + + /* current tag */ + char tag[1024]; + size_t taglen; + /* current tag is in short form ? <tag /> */ + int isshorttag; + /* current attribute name */ + char name[1024]; + /* data buffer used for tag data, cdata and attribute data */ + char data[BUFSIZ]; +} XMLParser; + +int xml_entitytostr(const char *, char *, size_t); +void xml_parse(XMLParser *); +#endif