diff options
author | Marvin Borner | 2021-05-23 14:30:07 +0200 |
---|---|---|
committer | Marvin Borner | 2021-05-23 14:34:52 +0200 |
commit | 33fd97e19a12535c02b1cf6804cb854a279e040c (patch) | |
tree | 0ea00a9b60b35e42830eb3b13887fea2d356d655 /libs/libtxt | |
parent | 0bf64a113f3d3baa110b362fd6a215ef29182671 (diff) |
Cleanup, linting, formatting
Diffstat (limited to 'libs/libtxt')
-rw-r--r-- | libs/libtxt/xml.c | 514 | ||||
-rw-r--r-- | libs/libtxt/xml.h | 51 |
2 files changed, 0 insertions, 565 deletions
diff --git a/libs/libtxt/xml.c b/libs/libtxt/xml.c deleted file mode 100644 index 9a5fd76..0000000 --- a/libs/libtxt/xml.c +++ /dev/null @@ -1,514 +0,0 @@ -// Inspired by sxml (capmar) -// MIT License, Copyright (c) 2020 Marvin Borner - -#include <assert.h> -#include <libtxt/xml.h> -#include <mem.h> -#include <str.h> - -static const char *str_findchr(const char *start, const char *end, int c) -{ - const char *it; - - assert(start <= end); - assert(0 <= c && c <= 127); - - it = (const char *)memchr((void *)start, c, end - start); - return (it != NULL) ? it : end; -} - -static const char *str_findstr(const char *start, const char *end, const char *needle) -{ - u32 needlelen; - int first; - assert(start <= end); - - needlelen = strlen(needle); - assert(0 < needlelen); - first = (u8)needle[0]; - - while (start + needlelen <= end) { - const char *it = - (const char *)memchr((void *)start, first, (end - start) - (needlelen - 1)); - if (it == NULL) - break; - - if (memcmp(it, needle, needlelen) == 0) - return it; - - start = it + 1; - } - - return end; -} - -static int str_starts_with(const char *start, const char *end, const char *prefix) -{ - long nbytes; - assert(start <= end); - - nbytes = strlen(prefix); - if (end - start < nbytes) - return 0; - - return memcmp(prefix, start, nbytes) == 0; -} - -static int white_space(int c) -{ - switch (c) { - case ' ': - case '\t': - case '\r': - case '\n': - return 1; - } - - return 0; -} - -static int name_start_char(int c) -{ - if (0x80 <= c) - return 1; - - return c == ':' || ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z'); -} - -static int name_char(int c) -{ - return name_start_char(c) || c == '-' || c == '.' || ('0' <= c && c <= '9') || c == 0xB7 || - (0x0300 <= c && c <= 0x036F) || (0x203F <= c && c <= 0x2040); -} - -#define is_space(c) (white_space(((u8)(c)))) -#define is_alpha(c) (name_start_char(((u8)(c)))) -#define is_alnum(c) (name_char(((u8)(c)))) - -static const char *str_ltrim(const char *start, const char *end) -{ - const char *it; - assert(start <= end); - - for (it = start; it != end && is_space(*it); it++) - ; - - return it; -} - -static const char *str_rtrim(const char *start, const char *end) -{ - const char *it, *prev; - assert(start <= end); - - for (it = end; start != it; it = prev) { - prev = it - 1; - if (!is_space(*prev)) - return it; - } - - return start; -} - -static const char *str_find_notalnum(const char *start, const char *end) -{ - const char *it; - assert(start <= end); - - for (it = start; it != end && is_alnum(*it); it++) - ; - - return it; -} - -#define buffer_from_offset(args, i) ((args)->buffer + (i)) -#define buffer_tooffset(args, ptr) (unsigned)((ptr) - (args)->buffer) -#define buffer_getend(args) ((args)->buffer + (args)->buffer_length) - -static int state_push_token(struct xml *state, struct xml_args *args, enum xml_type type, - const char *start, const char *end) -{ - struct xml_token *token; - u32 i; - if (args->num_tokens <= state->ntokens) - return 0; - - i = state->ntokens++; - token = &args->tokens[i]; - token->type = type; - token->start_pos = buffer_tooffset(args, start); - token->end_pos = buffer_tooffset(args, end); - token->size = 0; - - switch (type) { - case XML_START_TAG: - state->tag_level++; - break; - - case XML_END_TAG: - assert(0 < state->tag_level); - state->tag_level--; - break; - - default: - break; - } - - return 1; -} - -static enum xml_error state_set_pos(struct xml *state, const struct xml_args *args, const char *ptr) -{ - state->buffer_pos = buffer_tooffset(args, ptr); - return (state->ntokens <= args->num_tokens) ? XML_SUCCESS : XML_ERROR_TOKENSFULL; -} - -#define state_commit(dest, src) memcpy((dest), (src), sizeof(struct xml)) - -#define XML_ERROR_STRICT XML_ERROR_INVALID -#define ENTITY_MAXLEN 8 -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -static enum xml_error parse_characters(struct xml *state, struct xml_args *args, const char *end) -{ - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *limit, *colon, *ampr = str_findchr(start, end, '&'); - assert(end <= buffer_getend(args)); - - if (ampr != start) - state_push_token(state, args, XML_CHARACTER, start, ampr); - - if (ampr == end) - return state_set_pos(state, args, ampr); - - limit = MIN(ampr + ENTITY_MAXLEN, end); - colon = str_findchr(ampr, limit, ';'); - if (colon == limit) - return (limit == end) ? XML_ERROR_BUFFERDRY : XML_ERROR_INVALID; - - start = colon + 1; - state_push_token(state, args, XML_CHARACTER, ampr, start); - return state_set_pos(state, args, start); -} - -static enum xml_error parse_attrvalue(struct xml *state, struct xml_args *args, const char *end) -{ - while (buffer_from_offset(args, state->buffer_pos) != end) { - enum xml_error error = parse_characters(state, args, end); - if (error != XML_SUCCESS) - return error; - } - - return XML_SUCCESS; -} - -static enum xml_error parse_attributes(struct xml *state, struct xml_args *args) -{ - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - const char *name = str_ltrim(start, end); - - u32 ntokens = state->ntokens; - assert(0 < ntokens); - - while (name != end && is_alpha(*name)) { - const char *eq, *space, *quot, *value; - enum xml_error error; - - eq = str_findchr(name, end, '='); - if (eq == end) - return XML_ERROR_BUFFERDRY; - - space = str_rtrim(name, eq); - state_push_token(state, args, XML_CDATA, name, space); - - quot = str_ltrim(eq + 1, end); - if (quot == end) - return XML_ERROR_BUFFERDRY; - else if (*quot != '\'' && *quot != '"') - return XML_ERROR_INVALID; - - value = quot + 1; - quot = str_findchr(value, end, *quot); - if (quot == end) - return XML_ERROR_BUFFERDRY; - - state_set_pos(state, args, value); - error = parse_attrvalue(state, args, quot); - if (error != XML_SUCCESS) - return error; - - name = str_ltrim(quot + 1, end); - } - - { - struct xml_token *token = args->tokens + (ntokens - 1); - token->size = (u16)(state->ntokens - ntokens); - } - - return state_set_pos(state, args, name); -} - -#define TAG_LEN(str) (sizeof(str) - 1) -#define TAG_MINSIZE 1 - -static enum xml_error parse_comment(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<!--"; - static const char END_TAG[] = "-->"; - - const char *dash; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - if (end - start < (int)TAG_LEN(START_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_INVALID; - - start += TAG_LEN(START_TAG); - dash = str_findstr(start, end, END_TAG); - if (dash == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_COMMENT, start, dash); - return state_set_pos(state, args, dash + TAG_LEN(END_TAG)); -} - -static enum xml_error parse_instruction(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<?"; - static const char END_TAG[] = "?>"; - - enum xml_error error; - const char *quest, *space; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - assert(TAG_MINSIZE <= end - start); - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_INVALID; - - start += TAG_LEN(START_TAG); - space = str_find_notalnum(start, end); - if (space == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_INSTRUCTION, start, space); - - state_set_pos(state, args, space); - error = parse_attributes(state, args); - if (error != XML_SUCCESS) - return error; - - quest = buffer_from_offset(args, state->buffer_pos); - if (end - quest < (int)TAG_LEN(END_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(quest, end, END_TAG)) - return XML_ERROR_INVALID; - - return state_set_pos(state, args, quest + TAG_LEN(END_TAG)); -} - -static enum xml_error parse_doctype(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<!DOCTYPE"; - static const char END_TAG[] = ">"; - - const char *bracket; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - if (end - start < (int)TAG_LEN(START_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_BUFFERDRY; - - start += TAG_LEN(START_TAG); - bracket = str_findstr(start, end, END_TAG); - if (bracket == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_DOCTYPE, start, bracket); - return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); -} - -static enum xml_error parse_start(struct xml *state, struct xml_args *args) -{ - enum xml_error error; - const char *gt, *name, *space; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - assert(TAG_MINSIZE <= end - start); - - if (!(start[0] == '<' && is_alpha(start[1]))) - return XML_ERROR_INVALID; - - name = start + 1; - space = str_find_notalnum(name, end); - if (space == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_START_TAG, name, space); - - state_set_pos(state, args, space); - error = parse_attributes(state, args); - if (error != XML_SUCCESS) - return error; - - gt = buffer_from_offset(args, state->buffer_pos); - - if (gt != end && *gt == '/') { - state_push_token(state, args, XML_END_TAG, name, space); - gt++; - } - - if (gt == end) - return XML_ERROR_BUFFERDRY; - - if (*gt != '>') - return XML_ERROR_INVALID; - - return state_set_pos(state, args, gt + 1); -} - -static enum xml_error parse_end(struct xml *state, struct xml_args *args) -{ - const char *gt, *space; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - assert(TAG_MINSIZE <= end - start); - - if (!(str_starts_with(start, end, "</") && is_alpha(start[2]))) - return XML_ERROR_INVALID; - - start += 2; - gt = str_findchr(start, end, '>'); - if (gt == end) - return XML_ERROR_BUFFERDRY; - - space = str_find_notalnum(start, gt); - if (str_ltrim(space, gt) != gt) - return XML_ERROR_STRICT; - - state_push_token(state, args, XML_END_TAG, start, space); - return state_set_pos(state, args, gt + 1); -} - -static enum xml_error parse_cdata(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<![CDATA["; - static const char END_TAG[] = "]]>"; - - const char *bracket; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - if (end - start < (int)TAG_LEN(START_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_INVALID; - - start += TAG_LEN(START_TAG); - bracket = str_findstr(start, end, END_TAG); - if (bracket == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_CDATA, start, bracket); - return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); -} - -void xml_init(struct xml *state) -{ - state->buffer_pos = 0; - state->ntokens = 0; - state->tag_level = 0; -} - -#define ROOT_FOUND(state) (0 < (state)->tag_level) -#define ROOT_PARSED(state) ((state)->tag_level == 0) - -enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_length, - struct xml_token tokens[], u32 num_tokens) -{ - struct xml temp = *state; - const char *end = buffer + buffer_length; - - struct xml_args args; - args.buffer = buffer; - args.buffer_length = buffer_length; - args.tokens = tokens; - args.num_tokens = num_tokens; - - while (!ROOT_FOUND(&temp)) { - enum xml_error error; - const char *start = buffer_from_offset(&args, temp.buffer_pos); - const char *lt = str_ltrim(start, end); - state_set_pos(&temp, &args, lt); - state_commit(state, &temp); - - if (end - lt < TAG_MINSIZE) - return XML_ERROR_BUFFERDRY; - - if (*lt != '<') - return XML_ERROR_INVALID; - - switch (lt[1]) { - case '?': - error = parse_instruction(&temp, &args); - break; - case '!': - error = (lt[2] == '-') ? parse_comment(&temp, &args) : - parse_doctype(&temp, &args); - break; - default: - error = parse_start(&temp, &args); - break; - } - - if (error != XML_SUCCESS) - return error; - - state_commit(state, &temp); - } - - while (!ROOT_PARSED(&temp)) { - enum xml_error error; - const char *start = buffer_from_offset(&args, temp.buffer_pos); - const char *lt = str_findchr(start, end, '<'); - while (buffer_from_offset(&args, temp.buffer_pos) != lt) { - error = parse_characters(&temp, &args, lt); - if (error != XML_SUCCESS) - return error; - - state_commit(state, &temp); - } - - if (end - lt < TAG_MINSIZE) - return XML_ERROR_BUFFERDRY; - - switch (lt[1]) { - case '?': - error = parse_instruction(&temp, &args); - break; - case '/': - error = parse_end(&temp, &args); - break; - case '!': - error = (lt[2] == '-') ? parse_comment(&temp, &args) : - parse_cdata(&temp, &args); - break; - default: - error = parse_start(&temp, &args); - break; - } - - if (error != XML_SUCCESS) - return error; - - state_commit(state, &temp); - } - - return XML_SUCCESS; -} diff --git a/libs/libtxt/xml.h b/libs/libtxt/xml.h deleted file mode 100644 index 3f5c74d..0000000 --- a/libs/libtxt/xml.h +++ /dev/null @@ -1,51 +0,0 @@ -// Inspired by sxml (capmar) -// MIT License, Copyright (c) 2020 Marvin Borner - -#ifndef XML_H -#define XML_H - -#include <def.h> - -enum xml_error { - XML_ERROR_INVALID = -1, - XML_SUCCESS = 0, - XML_ERROR_BUFFERDRY = 1, - XML_ERROR_TOKENSFULL = 2 -}; - -struct xml_token { - u16 type; - u16 size; - u32 start_pos; - u32 end_pos; -}; - -struct xml_args { - const char *buffer; - u32 buffer_length; - struct xml_token *tokens; - u32 num_tokens; -}; - -enum xml_type { - XML_START_TAG, - XML_END_TAG, - XML_CHARACTER, - XML_CDATA, - XML_INSTRUCTION, - XML_DOCTYPE, - XML_COMMENT -}; - -struct xml { - u32 buffer_pos; - u32 ntokens; - u32 tag_level; -}; - -enum xml_error xml_parse(struct xml *parser, const char *buffer, u32 buffer_length, - struct xml_token *tokens, u32 num_tokens) NONNULL; - -void xml_init(struct xml *parser) NONNULL; - -#endif |