aboutsummaryrefslogtreecommitdiff
path: root/libs/libtxt/xml.c
diff options
context:
space:
mode:
Diffstat (limited to 'libs/libtxt/xml.c')
-rw-r--r--libs/libtxt/xml.c514
1 files changed, 0 insertions, 514 deletions
diff --git a/libs/libtxt/xml.c b/libs/libtxt/xml.c
deleted file mode 100644
index 9a5fd76..0000000
--- a/libs/libtxt/xml.c
+++ /dev/null
@@ -1,514 +0,0 @@
-// Inspired by sxml (capmar)
-// MIT License, Copyright (c) 2020 Marvin Borner
-
-#include <assert.h>
-#include <libtxt/xml.h>
-#include <mem.h>
-#include <str.h>
-
-static const char *str_findchr(const char *start, const char *end, int c)
-{
- const char *it;
-
- assert(start <= end);
- assert(0 <= c && c <= 127);
-
- it = (const char *)memchr((void *)start, c, end - start);
- return (it != NULL) ? it : end;
-}
-
-static const char *str_findstr(const char *start, const char *end, const char *needle)
-{
- u32 needlelen;
- int first;
- assert(start <= end);
-
- needlelen = strlen(needle);
- assert(0 < needlelen);
- first = (u8)needle[0];
-
- while (start + needlelen <= end) {
- const char *it =
- (const char *)memchr((void *)start, first, (end - start) - (needlelen - 1));
- if (it == NULL)
- break;
-
- if (memcmp(it, needle, needlelen) == 0)
- return it;
-
- start = it + 1;
- }
-
- return end;
-}
-
-static int str_starts_with(const char *start, const char *end, const char *prefix)
-{
- long nbytes;
- assert(start <= end);
-
- nbytes = strlen(prefix);
- if (end - start < nbytes)
- return 0;
-
- return memcmp(prefix, start, nbytes) == 0;
-}
-
-static int white_space(int c)
-{
- switch (c) {
- case ' ':
- case '\t':
- case '\r':
- case '\n':
- return 1;
- }
-
- return 0;
-}
-
-static int name_start_char(int c)
-{
- if (0x80 <= c)
- return 1;
-
- return c == ':' || ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z');
-}
-
-static int name_char(int c)
-{
- return name_start_char(c) || c == '-' || c == '.' || ('0' <= c && c <= '9') || c == 0xB7 ||
- (0x0300 <= c && c <= 0x036F) || (0x203F <= c && c <= 0x2040);
-}
-
-#define is_space(c) (white_space(((u8)(c))))
-#define is_alpha(c) (name_start_char(((u8)(c))))
-#define is_alnum(c) (name_char(((u8)(c))))
-
-static const char *str_ltrim(const char *start, const char *end)
-{
- const char *it;
- assert(start <= end);
-
- for (it = start; it != end && is_space(*it); it++)
- ;
-
- return it;
-}
-
-static const char *str_rtrim(const char *start, const char *end)
-{
- const char *it, *prev;
- assert(start <= end);
-
- for (it = end; start != it; it = prev) {
- prev = it - 1;
- if (!is_space(*prev))
- return it;
- }
-
- return start;
-}
-
-static const char *str_find_notalnum(const char *start, const char *end)
-{
- const char *it;
- assert(start <= end);
-
- for (it = start; it != end && is_alnum(*it); it++)
- ;
-
- return it;
-}
-
-#define buffer_from_offset(args, i) ((args)->buffer + (i))
-#define buffer_tooffset(args, ptr) (unsigned)((ptr) - (args)->buffer)
-#define buffer_getend(args) ((args)->buffer + (args)->buffer_length)
-
-static int state_push_token(struct xml *state, struct xml_args *args, enum xml_type type,
- const char *start, const char *end)
-{
- struct xml_token *token;
- u32 i;
- if (args->num_tokens <= state->ntokens)
- return 0;
-
- i = state->ntokens++;
- token = &args->tokens[i];
- token->type = type;
- token->start_pos = buffer_tooffset(args, start);
- token->end_pos = buffer_tooffset(args, end);
- token->size = 0;
-
- switch (type) {
- case XML_START_TAG:
- state->tag_level++;
- break;
-
- case XML_END_TAG:
- assert(0 < state->tag_level);
- state->tag_level--;
- break;
-
- default:
- break;
- }
-
- return 1;
-}
-
-static enum xml_error state_set_pos(struct xml *state, const struct xml_args *args, const char *ptr)
-{
- state->buffer_pos = buffer_tooffset(args, ptr);
- return (state->ntokens <= args->num_tokens) ? XML_SUCCESS : XML_ERROR_TOKENSFULL;
-}
-
-#define state_commit(dest, src) memcpy((dest), (src), sizeof(struct xml))
-
-#define XML_ERROR_STRICT XML_ERROR_INVALID
-#define ENTITY_MAXLEN 8
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-static enum xml_error parse_characters(struct xml *state, struct xml_args *args, const char *end)
-{
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *limit, *colon, *ampr = str_findchr(start, end, '&');
- assert(end <= buffer_getend(args));
-
- if (ampr != start)
- state_push_token(state, args, XML_CHARACTER, start, ampr);
-
- if (ampr == end)
- return state_set_pos(state, args, ampr);
-
- limit = MIN(ampr + ENTITY_MAXLEN, end);
- colon = str_findchr(ampr, limit, ';');
- if (colon == limit)
- return (limit == end) ? XML_ERROR_BUFFERDRY : XML_ERROR_INVALID;
-
- start = colon + 1;
- state_push_token(state, args, XML_CHARACTER, ampr, start);
- return state_set_pos(state, args, start);
-}
-
-static enum xml_error parse_attrvalue(struct xml *state, struct xml_args *args, const char *end)
-{
- while (buffer_from_offset(args, state->buffer_pos) != end) {
- enum xml_error error = parse_characters(state, args, end);
- if (error != XML_SUCCESS)
- return error;
- }
-
- return XML_SUCCESS;
-}
-
-static enum xml_error parse_attributes(struct xml *state, struct xml_args *args)
-{
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- const char *name = str_ltrim(start, end);
-
- u32 ntokens = state->ntokens;
- assert(0 < ntokens);
-
- while (name != end && is_alpha(*name)) {
- const char *eq, *space, *quot, *value;
- enum xml_error error;
-
- eq = str_findchr(name, end, '=');
- if (eq == end)
- return XML_ERROR_BUFFERDRY;
-
- space = str_rtrim(name, eq);
- state_push_token(state, args, XML_CDATA, name, space);
-
- quot = str_ltrim(eq + 1, end);
- if (quot == end)
- return XML_ERROR_BUFFERDRY;
- else if (*quot != '\'' && *quot != '"')
- return XML_ERROR_INVALID;
-
- value = quot + 1;
- quot = str_findchr(value, end, *quot);
- if (quot == end)
- return XML_ERROR_BUFFERDRY;
-
- state_set_pos(state, args, value);
- error = parse_attrvalue(state, args, quot);
- if (error != XML_SUCCESS)
- return error;
-
- name = str_ltrim(quot + 1, end);
- }
-
- {
- struct xml_token *token = args->tokens + (ntokens - 1);
- token->size = (u16)(state->ntokens - ntokens);
- }
-
- return state_set_pos(state, args, name);
-}
-
-#define TAG_LEN(str) (sizeof(str) - 1)
-#define TAG_MINSIZE 1
-
-static enum xml_error parse_comment(struct xml *state, struct xml_args *args)
-{
- static const char START_TAG[] = "<!--";
- static const char END_TAG[] = "-->";
-
- const char *dash;
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- if (end - start < (int)TAG_LEN(START_TAG))
- return XML_ERROR_BUFFERDRY;
-
- if (!str_starts_with(start, end, START_TAG))
- return XML_ERROR_INVALID;
-
- start += TAG_LEN(START_TAG);
- dash = str_findstr(start, end, END_TAG);
- if (dash == end)
- return XML_ERROR_BUFFERDRY;
-
- state_push_token(state, args, XML_COMMENT, start, dash);
- return state_set_pos(state, args, dash + TAG_LEN(END_TAG));
-}
-
-static enum xml_error parse_instruction(struct xml *state, struct xml_args *args)
-{
- static const char START_TAG[] = "<?";
- static const char END_TAG[] = "?>";
-
- enum xml_error error;
- const char *quest, *space;
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- assert(TAG_MINSIZE <= end - start);
-
- if (!str_starts_with(start, end, START_TAG))
- return XML_ERROR_INVALID;
-
- start += TAG_LEN(START_TAG);
- space = str_find_notalnum(start, end);
- if (space == end)
- return XML_ERROR_BUFFERDRY;
-
- state_push_token(state, args, XML_INSTRUCTION, start, space);
-
- state_set_pos(state, args, space);
- error = parse_attributes(state, args);
- if (error != XML_SUCCESS)
- return error;
-
- quest = buffer_from_offset(args, state->buffer_pos);
- if (end - quest < (int)TAG_LEN(END_TAG))
- return XML_ERROR_BUFFERDRY;
-
- if (!str_starts_with(quest, end, END_TAG))
- return XML_ERROR_INVALID;
-
- return state_set_pos(state, args, quest + TAG_LEN(END_TAG));
-}
-
-static enum xml_error parse_doctype(struct xml *state, struct xml_args *args)
-{
- static const char START_TAG[] = "<!DOCTYPE";
- static const char END_TAG[] = ">";
-
- const char *bracket;
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- if (end - start < (int)TAG_LEN(START_TAG))
- return XML_ERROR_BUFFERDRY;
-
- if (!str_starts_with(start, end, START_TAG))
- return XML_ERROR_BUFFERDRY;
-
- start += TAG_LEN(START_TAG);
- bracket = str_findstr(start, end, END_TAG);
- if (bracket == end)
- return XML_ERROR_BUFFERDRY;
-
- state_push_token(state, args, XML_DOCTYPE, start, bracket);
- return state_set_pos(state, args, bracket + TAG_LEN(END_TAG));
-}
-
-static enum xml_error parse_start(struct xml *state, struct xml_args *args)
-{
- enum xml_error error;
- const char *gt, *name, *space;
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- assert(TAG_MINSIZE <= end - start);
-
- if (!(start[0] == '<' && is_alpha(start[1])))
- return XML_ERROR_INVALID;
-
- name = start + 1;
- space = str_find_notalnum(name, end);
- if (space == end)
- return XML_ERROR_BUFFERDRY;
-
- state_push_token(state, args, XML_START_TAG, name, space);
-
- state_set_pos(state, args, space);
- error = parse_attributes(state, args);
- if (error != XML_SUCCESS)
- return error;
-
- gt = buffer_from_offset(args, state->buffer_pos);
-
- if (gt != end && *gt == '/') {
- state_push_token(state, args, XML_END_TAG, name, space);
- gt++;
- }
-
- if (gt == end)
- return XML_ERROR_BUFFERDRY;
-
- if (*gt != '>')
- return XML_ERROR_INVALID;
-
- return state_set_pos(state, args, gt + 1);
-}
-
-static enum xml_error parse_end(struct xml *state, struct xml_args *args)
-{
- const char *gt, *space;
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- assert(TAG_MINSIZE <= end - start);
-
- if (!(str_starts_with(start, end, "</") && is_alpha(start[2])))
- return XML_ERROR_INVALID;
-
- start += 2;
- gt = str_findchr(start, end, '>');
- if (gt == end)
- return XML_ERROR_BUFFERDRY;
-
- space = str_find_notalnum(start, gt);
- if (str_ltrim(space, gt) != gt)
- return XML_ERROR_STRICT;
-
- state_push_token(state, args, XML_END_TAG, start, space);
- return state_set_pos(state, args, gt + 1);
-}
-
-static enum xml_error parse_cdata(struct xml *state, struct xml_args *args)
-{
- static const char START_TAG[] = "<![CDATA[";
- static const char END_TAG[] = "]]>";
-
- const char *bracket;
- const char *start = buffer_from_offset(args, state->buffer_pos);
- const char *end = buffer_getend(args);
- if (end - start < (int)TAG_LEN(START_TAG))
- return XML_ERROR_BUFFERDRY;
-
- if (!str_starts_with(start, end, START_TAG))
- return XML_ERROR_INVALID;
-
- start += TAG_LEN(START_TAG);
- bracket = str_findstr(start, end, END_TAG);
- if (bracket == end)
- return XML_ERROR_BUFFERDRY;
-
- state_push_token(state, args, XML_CDATA, start, bracket);
- return state_set_pos(state, args, bracket + TAG_LEN(END_TAG));
-}
-
-void xml_init(struct xml *state)
-{
- state->buffer_pos = 0;
- state->ntokens = 0;
- state->tag_level = 0;
-}
-
-#define ROOT_FOUND(state) (0 < (state)->tag_level)
-#define ROOT_PARSED(state) ((state)->tag_level == 0)
-
-enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_length,
- struct xml_token tokens[], u32 num_tokens)
-{
- struct xml temp = *state;
- const char *end = buffer + buffer_length;
-
- struct xml_args args;
- args.buffer = buffer;
- args.buffer_length = buffer_length;
- args.tokens = tokens;
- args.num_tokens = num_tokens;
-
- while (!ROOT_FOUND(&temp)) {
- enum xml_error error;
- const char *start = buffer_from_offset(&args, temp.buffer_pos);
- const char *lt = str_ltrim(start, end);
- state_set_pos(&temp, &args, lt);
- state_commit(state, &temp);
-
- if (end - lt < TAG_MINSIZE)
- return XML_ERROR_BUFFERDRY;
-
- if (*lt != '<')
- return XML_ERROR_INVALID;
-
- switch (lt[1]) {
- case '?':
- error = parse_instruction(&temp, &args);
- break;
- case '!':
- error = (lt[2] == '-') ? parse_comment(&temp, &args) :
- parse_doctype(&temp, &args);
- break;
- default:
- error = parse_start(&temp, &args);
- break;
- }
-
- if (error != XML_SUCCESS)
- return error;
-
- state_commit(state, &temp);
- }
-
- while (!ROOT_PARSED(&temp)) {
- enum xml_error error;
- const char *start = buffer_from_offset(&args, temp.buffer_pos);
- const char *lt = str_findchr(start, end, '<');
- while (buffer_from_offset(&args, temp.buffer_pos) != lt) {
- error = parse_characters(&temp, &args, lt);
- if (error != XML_SUCCESS)
- return error;
-
- state_commit(state, &temp);
- }
-
- if (end - lt < TAG_MINSIZE)
- return XML_ERROR_BUFFERDRY;
-
- switch (lt[1]) {
- case '?':
- error = parse_instruction(&temp, &args);
- break;
- case '/':
- error = parse_end(&temp, &args);
- break;
- case '!':
- error = (lt[2] == '-') ? parse_comment(&temp, &args) :
- parse_cdata(&temp, &args);
- break;
- default:
- error = parse_start(&temp, &args);
- break;
- }
-
- if (error != XML_SUCCESS)
- return error;
-
- state_commit(state, &temp);
- }
-
- return XML_SUCCESS;
-}