diff options
author | Marvin Borner | 2021-03-26 21:55:50 +0100 |
---|---|---|
committer | Marvin Borner | 2021-03-26 22:02:20 +0100 |
commit | 05498860e8f7b1e8bb27880bc7526de026694804 (patch) | |
tree | 3bddf16e9439a950a3810d45e42a5cefdbcb7663 /libtxt | |
parent | a96e9c4c858d47f61b89d879aa0ce6a02bdacb38 (diff) |
Renamed libs
Cleaner and more flexible.
Diffstat (limited to 'libtxt')
-rw-r--r-- | libtxt/Makefile | 20 | ||||
-rw-r--r-- | libtxt/html.c | 238 | ||||
-rw-r--r-- | libtxt/inc/html.h | 26 | ||||
-rw-r--r-- | libtxt/inc/keymap.h | 16 | ||||
-rw-r--r-- | libtxt/inc/xml.h | 51 | ||||
-rw-r--r-- | libtxt/keymap.c | 89 | ||||
-rw-r--r-- | libtxt/xml.c | 514 |
7 files changed, 0 insertions, 954 deletions
diff --git a/libtxt/Makefile b/libtxt/Makefile deleted file mode 100644 index d0d4c17..0000000 --- a/libtxt/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# MIT License, Copyright (c) 2020 Marvin Borner - -COBJS = keymap.o #xml.o html.o -CC = ccache ../cross/opt/bin/i686-elf-gcc -LD = ccache ../cross/opt/bin/i686-elf-ld -AR = ccache ../cross/opt/bin/i686-elf-ar - -CFLAGS = $(CFLAGS_DEFAULT) -Iinc/ -I../libc/inc/ -I../libgui/inc/ -fPIE -Duserspace - -all: libtxt - -%.o: %.c - @$(CC) -c $(CFLAGS) $< -o $@ - -libtxt: $(COBJS) - @mkdir -p ../build/ - @$(AR) rcs ../build/libtxt.a $+ - -clean: - @find . -name "*.o" -type f -delete diff --git a/libtxt/html.c b/libtxt/html.c deleted file mode 100644 index f40f7b3..0000000 --- a/libtxt/html.c +++ /dev/null @@ -1,238 +0,0 @@ -// MIT License, Copyright (c) 2020 Marvin Borner -// HTML parsing is mainly based on the XML parser - -#include <assert.h> -#include <gui.h> -#include <html.h> -#include <list.h> -#include <mem.h> -#include <print.h> -#include <str.h> -#include <xml.h> - -static int is_self_closing(const char *tag) -{ - const char *void_elements[] = { "area", "base", "br", "col", "embed", "hr", "img", - "input", "link", "meta", "param", "source", "track", "wbr" }; - - for (u32 i = 0; i < sizeof(void_elements) / sizeof(void_elements[0]); ++i) { - if (!strcmp(void_elements[i], tag)) - return 1; - } - return 0; -} - -static char *normalize_tag_name(char *tag) -{ - for (char *p = tag; *p; ++p) - *p = *p > 0x40 && *p < 0x5b ? *p | 0x60 : *p; - return tag; -} - -static struct dom *new_object(const char *tag, struct dom *parent) -{ - struct dom *object = malloc(sizeof(*object)); - object->tag = strdup(tag); - object->parent = parent; - object->content = NULL; - object->children = list_new(); - return object; -} - -static void print_dom(struct dom *dom, u32 level) -{ - struct node *iterator = dom->children->head; - while (iterator != NULL) { - struct dom *obj = iterator->data; - for (u32 i = 0; i < level; i++) - print("\t"); - printf("'%s': '%s'\n", obj->tag, obj->content ? obj->content : ""); - if (obj->children->head) - print_dom(obj, level + 1); - iterator = iterator->next; - } -} - -static struct dom *generate_dom(char *data, u32 length) -{ - struct xml_token tokens[128]; - struct xml parser; - xml_init(&parser); - void *buffer = data; - enum xml_error error = xml_parse(&parser, buffer, length, tokens, 128); - - if (error != XML_SUCCESS && error != XML_ERROR_BUFFERDRY) { - printf("XML parse error: %d\n", err); - printf("DATA: '%s'\n", data); - return NULL; - } - - struct dom *root = new_object("root", NULL); - struct dom *current = root; - - static char name[256] = { 0 }; - for (u32 i = 0; i < parser.ntokens; i++) { - const struct xml_token *token = tokens + i; - name[0] = '\0'; - switch (token->type) { - case XML_START_TAG: - memcpy(&name, (u8 *)buffer + token->start_pos, - token->end_pos - token->start_pos); - name[token->end_pos - token->start_pos] = '\0'; - normalize_tag_name(name); - current = new_object(name, current); - printf("Adding %s to %s\n", current->tag, current->parent->tag); - list_add(current->parent->children, current); - if (is_self_closing(name)) - current = current->parent; - break; - case XML_END_TAG: - memcpy(&name, (u8 *)buffer + token->start_pos, - token->end_pos - token->start_pos); - name[token->end_pos - token->start_pos] = '\0'; - normalize_tag_name(name); - - if (is_self_closing(name)) - break; - - if (!current || !current->parent || strcmp(name, current->tag)) - return NULL; - - current = current->parent; - break; - case XML_CHARACTER: - if (!current) - continue; - - if (token->end_pos == token->start_pos + 2) { - const char *ptr = (char *)buffer + token->start_pos; - - if (ptr[0] == '\r' && ptr[1] == '\n') - continue; - } - memcpy(&name, (u8 *)buffer + token->start_pos, - token->end_pos - token->start_pos); - name[token->end_pos - token->start_pos] = '\0'; - char *clean_name = name; - for (char *p = name; *p; p++) { - if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') { - clean_name++; - } else { - break; - } - } - if (!strlen(clean_name)) - break; - current->content = strdup(clean_name); - break; - default: - break; - } - - i += token->size; - } - - assert(root); - print("GENERATED!\n"); - print_dom(root, 0); - return root; -} - -static struct html_element *new_html_element(struct element *container, struct dom *dom) -{ - struct html_element *elem = malloc(sizeof(*elem)); - elem->x_offset = 0; - elem->y_offset = 0; - elem->dom = dom; - elem->obj = container; - return elem; -} - -// TODO: Better structure? -// TODO: Less code duplication (e.g. for headings) -#define CMP(tag, tag_string) (!strcmp((tag), (tag_string))) -static struct html_element *render_object(struct html_element *container, struct dom *dom) -{ - char *tag = dom->tag; - - assert(container); - if (CMP(tag, "html")) { - struct element *obj = - gui_add_container(container->obj, 0, 0, 100, 100, COLOR_WHITE); - return new_html_element(obj, dom); - } else if (CMP(tag, "body")) { - struct element *obj = - gui_add_container(container->obj, 0, 0, 100, 100, COLOR_WHITE); - return new_html_element(obj, dom); - } else if (CMP(tag, "h1")) { - struct element *obj = - gui_add_label(container->obj, container->x_offset, container->y_offset, - FONT_32, dom->content, COLOR_WHITE, COLOR_BLACK); - container->x_offset = 0; - container->y_offset += obj->ctx->size.y; - return new_html_element(obj, dom); - } else if (CMP(tag, "h2")) { - struct element *obj = - gui_add_label(container->obj, container->x_offset, container->y_offset, - FONT_24, dom->content, COLOR_WHITE, COLOR_BLACK); - container->x_offset = 0; - container->y_offset += obj->ctx->size.y; - return new_html_element(obj, dom); - } else if (CMP(tag, "h3")) { - struct element *obj = - gui_add_label(container->obj, container->x_offset, container->y_offset, - FONT_16, dom->content, COLOR_WHITE, COLOR_BLACK); - container->x_offset = 0; - container->y_offset += obj->ctx->size.y; - return new_html_element(obj, dom); - } else if (CMP(tag, "p")) { - struct element *obj = - gui_add_label(container->obj, container->x_offset, container->y_offset, - FONT_16, dom->content, COLOR_WHITE, COLOR_BLACK); - container->x_offset = 0; - container->y_offset += obj->ctx->size.y; - return new_html_element(obj, dom); - } else if (CMP(tag, "hr")) { - gfx_draw_rectangle(container->obj->ctx, - vec2(container->x_offset, container->y_offset), - vec2(container->obj->ctx->size.x - container->x_offset, - container->y_offset + 2), - COLOR_BLACK); - container->x_offset = 0; - container->y_offset += 2; - return container; - } else if (CMP(tag, "head") || CMP(tag, "meta") || CMP(tag, "title")) { - return container; - } else { - printf("UNKNOWN %s\n", tag); - if (dom->content && strlen(dom->content) > 0) { - struct element *obj = gui_add_label(container->obj, container->x_offset, - container->y_offset, FONT_16, - dom->content, COLOR_WHITE, COLOR_BLACK); - container->x_offset = 0; - container->y_offset += obj->ctx->size.y; - return new_html_element(obj, dom); - } - return container; - } -} - -int html_render_dom(struct html_element *container, struct dom *dom) -{ - struct node *iterator = dom->children->head; - while (iterator != NULL) { - struct dom *obj = iterator->data; - struct html_element *rendered = render_object(container, obj); - if (obj->children->head && rendered) - html_render_dom(rendered, obj); - iterator = iterator->next; - } - return 1; -} - -int html_render(struct element *container, char *data, u32 length) -{ - struct dom *dom = generate_dom(data, length); - struct html_element *obj = new_html_element(container, dom); - return dom && obj && html_render_dom(obj, dom); -} diff --git a/libtxt/inc/html.h b/libtxt/inc/html.h deleted file mode 100644 index c1b29f2..0000000 --- a/libtxt/inc/html.h +++ /dev/null @@ -1,26 +0,0 @@ -// MIT License, Copyright (c) 2020 Marvin Borner -// HTML parsing is mainly based on the XML parser - -#ifndef HTML_H -#define HTML_H - -#include <def.h> -#include <list.h> - -struct dom { - char *tag; - char *content; - struct dom *parent; - struct list *children; -}; - -struct html_element { - u32 x_offset; - u32 y_offset; - struct dom *dom; - struct element *obj; -}; - -int html_render(struct element *container, char *data, u32 length); - -#endif diff --git a/libtxt/inc/keymap.h b/libtxt/inc/keymap.h deleted file mode 100644 index 9f1966e..0000000 --- a/libtxt/inc/keymap.h +++ /dev/null @@ -1,16 +0,0 @@ -// MIT License, Copyright (c) 2020 Marvin Borner - -#ifndef KEYMAP_H -#define KEYMAP_H - -#define KEYMAP_LENGTH 90 - -struct keymap { - char map[KEYMAP_LENGTH]; - char shift_map[KEYMAP_LENGTH]; - char alt_map[KEYMAP_LENGTH]; -}; - -struct keymap *keymap_parse(const char *path); - -#endif diff --git a/libtxt/inc/xml.h b/libtxt/inc/xml.h deleted file mode 100644 index 43a8005..0000000 --- a/libtxt/inc/xml.h +++ /dev/null @@ -1,51 +0,0 @@ -// Inspired by sxml (capmar) -// MIT License, Copyright (c) 2020 Marvin Borner - -#ifndef XML_H -#define XML_H - -#include <def.h> - -enum xml_error { - XML_ERROR_INVALID = -1, - XML_SUCCESS = 0, - XML_ERROR_BUFFERDRY = 1, - XML_ERROR_TOKENSFULL = 2 -}; - -struct xml_token { - u16 type; - u16 size; - u32 start_pos; - u32 end_pos; -}; - -struct xml_args { - const char *buffer; - u32 buffer_length; - struct xml_token *tokens; - u32 num_tokens; -}; - -enum xml_type { - XML_START_TAG, - XML_END_TAG, - XML_CHARACTER, - XML_CDATA, - XML_INSTRUCTION, - XML_DOCTYPE, - XML_COMMENT -}; - -struct xml { - u32 buffer_pos; - u32 ntokens; - u32 tag_level; -}; - -enum xml_error xml_parse(struct xml *parser, const char *buffer, u32 buffer_length, - struct xml_token *tokens, u32 num_tokens); - -void xml_init(struct xml *parser); - -#endif diff --git a/libtxt/keymap.c b/libtxt/keymap.c deleted file mode 100644 index 175c715..0000000 --- a/libtxt/keymap.c +++ /dev/null @@ -1,89 +0,0 @@ -// MIT License, Copyright (c) 2020 Marvin Borner - -#include <def.h> -#include <keymap.h> -#include <mem.h> -#include <print.h> -#include <sys.h> - -static void map(struct keymap *keymap, int line, char ch, int index) -{ - switch (line) { - case 0: - keymap->map[index] = ch; - break; - case 1: - keymap->shift_map[index] = ch; - break; - case 2: - keymap->alt_map[index] = ch; - break; - default: - break; - } -} - -// Very ugly code but it should work for now -struct keymap *keymap_parse(const char *path) -{ - char *keymap_src = sread(path); - if (!keymap_src) - return NULL; - struct keymap *keymap = malloc(sizeof(*keymap)); - - int index = 0; - int ch_index = 0; - char ch; - int escaped = 0; - int line = 0; - int skip = 0; - while ((ch = keymap_src[index]) != '\0' || escaped) { - if (ch == ' ' && !skip) { - skip = 1; - index++; - continue; - } else if (ch == '\n') { - ch_index = 0; - index++; - line++; - continue; - } else if (ch == '\\' && !escaped) { - escaped = 1; - index++; - continue; - } - skip = 0; - - if (ch == ' ' && !escaped) - ch = 0; - - ch_index++; - if (escaped) { - switch (ch) { - case 'b': - ch = '\b'; - break; - case 't': - ch = '\t'; - break; - case 'n': - ch = '\n'; - break; - case '\\': - ch = '\\'; - break; - case ' ': - ch = ' '; - break; - default: - print("Unknown escape!\n"); - } - escaped = 0; - } - - map(keymap, line, ch, ch_index); - index++; - } - - return keymap; -} diff --git a/libtxt/xml.c b/libtxt/xml.c deleted file mode 100644 index e42af8c..0000000 --- a/libtxt/xml.c +++ /dev/null @@ -1,514 +0,0 @@ -// Inspired by sxml (capmar) -// MIT License, Copyright (c) 2020 Marvin Borner - -#include <assert.h> -#include <mem.h> -#include <str.h> -#include <xml.h> - -static const char *str_findchr(const char *start, const char *end, int c) -{ - const char *it; - - assert(start <= end); - assert(0 <= c && c <= 127); - - it = (const char *)memchr((void *)start, c, end - start); - return (it != NULL) ? it : end; -} - -static const char *str_findstr(const char *start, const char *end, const char *needle) -{ - u32 needlelen; - int first; - assert(start <= end); - - needlelen = strlen(needle); - assert(0 < needlelen); - first = (u8)needle[0]; - - while (start + needlelen <= end) { - const char *it = - (const char *)memchr((void *)start, first, (end - start) - (needlelen - 1)); - if (it == NULL) - break; - - if (memcmp(it, needle, needlelen) == 0) - return it; - - start = it + 1; - } - - return end; -} - -static int str_starts_with(const char *start, const char *end, const char *prefix) -{ - long nbytes; - assert(start <= end); - - nbytes = strlen(prefix); - if (end - start < nbytes) - return 0; - - return memcmp(prefix, start, nbytes) == 0; -} - -static int white_space(int c) -{ - switch (c) { - case ' ': - case '\t': - case '\r': - case '\n': - return 1; - } - - return 0; -} - -static int name_start_char(int c) -{ - if (0x80 <= c) - return 1; - - return c == ':' || ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z'); -} - -static int name_char(int c) -{ - return name_start_char(c) || c == '-' || c == '.' || ('0' <= c && c <= '9') || c == 0xB7 || - (0x0300 <= c && c <= 0x036F) || (0x203F <= c && c <= 0x2040); -} - -#define is_space(c) (white_space(((u8)(c)))) -#define is_alpha(c) (name_start_char(((u8)(c)))) -#define is_alnum(c) (name_char(((u8)(c)))) - -static const char *str_ltrim(const char *start, const char *end) -{ - const char *it; - assert(start <= end); - - for (it = start; it != end && is_space(*it); it++) - ; - - return it; -} - -static const char *str_rtrim(const char *start, const char *end) -{ - const char *it, *prev; - assert(start <= end); - - for (it = end; start != it; it = prev) { - prev = it - 1; - if (!is_space(*prev)) - return it; - } - - return start; -} - -static const char *str_find_notalnum(const char *start, const char *end) -{ - const char *it; - assert(start <= end); - - for (it = start; it != end && is_alnum(*it); it++) - ; - - return it; -} - -#define buffer_from_offset(args, i) ((args)->buffer + (i)) -#define buffer_tooffset(args, ptr) (unsigned)((ptr) - (args)->buffer) -#define buffer_getend(args) ((args)->buffer + (args)->buffer_length) - -static int state_push_token(struct xml *state, struct xml_args *args, enum xml_type type, - const char *start, const char *end) -{ - struct xml_token *token; - u32 i; - if (args->num_tokens <= state->ntokens) - return 0; - - i = state->ntokens++; - token = &args->tokens[i]; - token->type = type; - token->start_pos = buffer_tooffset(args, start); - token->end_pos = buffer_tooffset(args, end); - token->size = 0; - - switch (type) { - case XML_START_TAG: - state->tag_level++; - break; - - case XML_END_TAG: - assert(0 < state->tag_level); - state->tag_level--; - break; - - default: - break; - } - - return 1; -} - -static enum xml_error state_set_pos(struct xml *state, const struct xml_args *args, const char *ptr) -{ - state->buffer_pos = buffer_tooffset(args, ptr); - return (state->ntokens <= args->num_tokens) ? XML_SUCCESS : XML_ERROR_TOKENSFULL; -} - -#define state_commit(dest, src) memcpy((dest), (src), sizeof(struct xml)) - -#define XML_ERROR_STRICT XML_ERROR_INVALID -#define ENTITY_MAXLEN 8 -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -static enum xml_error parse_characters(struct xml *state, struct xml_args *args, const char *end) -{ - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *limit, *colon, *ampr = str_findchr(start, end, '&'); - assert(end <= buffer_getend(args)); - - if (ampr != start) - state_push_token(state, args, XML_CHARACTER, start, ampr); - - if (ampr == end) - return state_set_pos(state, args, ampr); - - limit = MIN(ampr + ENTITY_MAXLEN, end); - colon = str_findchr(ampr, limit, ';'); - if (colon == limit) - return (limit == end) ? XML_ERROR_BUFFERDRY : XML_ERROR_INVALID; - - start = colon + 1; - state_push_token(state, args, XML_CHARACTER, ampr, start); - return state_set_pos(state, args, start); -} - -static enum xml_error parse_attrvalue(struct xml *state, struct xml_args *args, const char *end) -{ - while (buffer_from_offset(args, state->buffer_pos) != end) { - enum xml_error error = parse_characters(state, args, end); - if (error != XML_SUCCESS) - return error; - } - - return XML_SUCCESS; -} - -static enum xml_error parse_attributes(struct xml *state, struct xml_args *args) -{ - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - const char *name = str_ltrim(start, end); - - u32 ntokens = state->ntokens; - assert(0 < ntokens); - - while (name != end && is_alpha(*name)) { - const char *eq, *space, *quot, *value; - enum xml_error error; - - eq = str_findchr(name, end, '='); - if (eq == end) - return XML_ERROR_BUFFERDRY; - - space = str_rtrim(name, eq); - state_push_token(state, args, XML_CDATA, name, space); - - quot = str_ltrim(eq + 1, end); - if (quot == end) - return XML_ERROR_BUFFERDRY; - else if (*quot != '\'' && *quot != '"') - return XML_ERROR_INVALID; - - value = quot + 1; - quot = str_findchr(value, end, *quot); - if (quot == end) - return XML_ERROR_BUFFERDRY; - - state_set_pos(state, args, value); - error = parse_attrvalue(state, args, quot); - if (error != XML_SUCCESS) - return error; - - name = str_ltrim(quot + 1, end); - } - - { - struct xml_token *token = args->tokens + (ntokens - 1); - token->size = (u16)(state->ntokens - ntokens); - } - - return state_set_pos(state, args, name); -} - -#define TAG_LEN(str) (sizeof(str) - 1) -#define TAG_MINSIZE 1 - -static enum xml_error parse_comment(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<!--"; - static const char END_TAG[] = "-->"; - - const char *dash; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - if (end - start < (int)TAG_LEN(START_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_INVALID; - - start += TAG_LEN(START_TAG); - dash = str_findstr(start, end, END_TAG); - if (dash == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_COMMENT, start, dash); - return state_set_pos(state, args, dash + TAG_LEN(END_TAG)); -} - -static enum xml_error parse_instruction(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<?"; - static const char END_TAG[] = "?>"; - - enum xml_error error; - const char *quest, *space; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - assert(TAG_MINSIZE <= end - start); - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_INVALID; - - start += TAG_LEN(START_TAG); - space = str_find_notalnum(start, end); - if (space == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_INSTRUCTION, start, space); - - state_set_pos(state, args, space); - error = parse_attributes(state, args); - if (error != XML_SUCCESS) - return error; - - quest = buffer_from_offset(args, state->buffer_pos); - if (end - quest < (int)TAG_LEN(END_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(quest, end, END_TAG)) - return XML_ERROR_INVALID; - - return state_set_pos(state, args, quest + TAG_LEN(END_TAG)); -} - -static enum xml_error parse_doctype(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<!DOCTYPE"; - static const char END_TAG[] = ">"; - - const char *bracket; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - if (end - start < (int)TAG_LEN(START_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_BUFFERDRY; - - start += TAG_LEN(START_TAG); - bracket = str_findstr(start, end, END_TAG); - if (bracket == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_DOCTYPE, start, bracket); - return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); -} - -static enum xml_error parse_start(struct xml *state, struct xml_args *args) -{ - enum xml_error error; - const char *gt, *name, *space; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - assert(TAG_MINSIZE <= end - start); - - if (!(start[0] == '<' && is_alpha(start[1]))) - return XML_ERROR_INVALID; - - name = start + 1; - space = str_find_notalnum(name, end); - if (space == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_START_TAG, name, space); - - state_set_pos(state, args, space); - error = parse_attributes(state, args); - if (error != XML_SUCCESS) - return error; - - gt = buffer_from_offset(args, state->buffer_pos); - - if (gt != end && *gt == '/') { - state_push_token(state, args, XML_END_TAG, name, space); - gt++; - } - - if (gt == end) - return XML_ERROR_BUFFERDRY; - - if (*gt != '>') - return XML_ERROR_INVALID; - - return state_set_pos(state, args, gt + 1); -} - -static enum xml_error parse_end(struct xml *state, struct xml_args *args) -{ - const char *gt, *space; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - assert(TAG_MINSIZE <= end - start); - - if (!(str_starts_with(start, end, "</") && is_alpha(start[2]))) - return XML_ERROR_INVALID; - - start += 2; - gt = str_findchr(start, end, '>'); - if (gt == end) - return XML_ERROR_BUFFERDRY; - - space = str_find_notalnum(start, gt); - if (str_ltrim(space, gt) != gt) - return XML_ERROR_STRICT; - - state_push_token(state, args, XML_END_TAG, start, space); - return state_set_pos(state, args, gt + 1); -} - -static enum xml_error parse_cdata(struct xml *state, struct xml_args *args) -{ - static const char START_TAG[] = "<![CDATA["; - static const char END_TAG[] = "]]>"; - - const char *bracket; - const char *start = buffer_from_offset(args, state->buffer_pos); - const char *end = buffer_getend(args); - if (end - start < (int)TAG_LEN(START_TAG)) - return XML_ERROR_BUFFERDRY; - - if (!str_starts_with(start, end, START_TAG)) - return XML_ERROR_INVALID; - - start += TAG_LEN(START_TAG); - bracket = str_findstr(start, end, END_TAG); - if (bracket == end) - return XML_ERROR_BUFFERDRY; - - state_push_token(state, args, XML_CDATA, start, bracket); - return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); -} - -void xml_init(struct xml *state) -{ - state->buffer_pos = 0; - state->ntokens = 0; - state->tag_level = 0; -} - -#define ROOT_FOUND(state) (0 < (state)->tag_level) -#define ROOT_PARSED(state) ((state)->tag_level == 0) - -enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_length, - struct xml_token tokens[], u32 num_tokens) -{ - struct xml temp = *state; - const char *end = buffer + buffer_length; - - struct xml_args args; - args.buffer = buffer; - args.buffer_length = buffer_length; - args.tokens = tokens; - args.num_tokens = num_tokens; - - while (!ROOT_FOUND(&temp)) { - enum xml_error error; - const char *start = buffer_from_offset(&args, temp.buffer_pos); - const char *lt = str_ltrim(start, end); - state_set_pos(&temp, &args, lt); - state_commit(state, &temp); - - if (end - lt < TAG_MINSIZE) - return XML_ERROR_BUFFERDRY; - - if (*lt != '<') - return XML_ERROR_INVALID; - - switch (lt[1]) { - case '?': - error = parse_instruction(&temp, &args); - break; - case '!': - error = (lt[2] == '-') ? parse_comment(&temp, &args) : - parse_doctype(&temp, &args); - break; - default: - error = parse_start(&temp, &args); - break; - } - - if (error != XML_SUCCESS) - return error; - - state_commit(state, &temp); - } - - while (!ROOT_PARSED(&temp)) { - enum xml_error error; - const char *start = buffer_from_offset(&args, temp.buffer_pos); - const char *lt = str_findchr(start, end, '<'); - while (buffer_from_offset(&args, temp.buffer_pos) != lt) { - error = parse_characters(&temp, &args, lt); - if (error != XML_SUCCESS) - return error; - - state_commit(state, &temp); - } - - if (end - lt < TAG_MINSIZE) - return XML_ERROR_BUFFERDRY; - - switch (lt[1]) { - case '?': - error = parse_instruction(&temp, &args); - break; - case '/': - error = parse_end(&temp, &args); - break; - case '!': - error = (lt[2] == '-') ? parse_comment(&temp, &args) : - parse_cdata(&temp, &args); - break; - default: - error = parse_start(&temp, &args); - break; - } - - if (error != XML_SUCCESS) - return error; - - state_commit(state, &temp); - } - - return XML_SUCCESS; -} |