diff options
author | Marvin Borner | 2021-03-26 21:55:50 +0100 |
---|---|---|
committer | Marvin Borner | 2021-03-26 22:02:20 +0100 |
commit | 05498860e8f7b1e8bb27880bc7526de026694804 (patch) | |
tree | 3bddf16e9439a950a3810d45e42a5cefdbcb7663 /libs/libtxt | |
parent | a96e9c4c858d47f61b89d879aa0ce6a02bdacb38 (diff) |
Renamed libs
Cleaner and more flexible.
Diffstat (limited to 'libs/libtxt')
-rw-r--r-- | libs/libtxt/Makefile | 20 | ||||
-rw-r--r-- | libs/libtxt/html.c | 238 | ||||
-rw-r--r-- | libs/libtxt/html.h | 26 | ||||
-rw-r--r-- | libs/libtxt/keymap.c | 89 | ||||
-rw-r--r-- | libs/libtxt/keymap.h | 16 | ||||
-rw-r--r-- | libs/libtxt/xml.c | 514 | ||||
-rw-r--r-- | libs/libtxt/xml.h | 51 |
7 files changed, 954 insertions, 0 deletions
diff --git a/libs/libtxt/Makefile b/libs/libtxt/Makefile new file mode 100644 index 0000000..93a7993 --- /dev/null +++ b/libs/libtxt/Makefile @@ -0,0 +1,20 @@ +# MIT License, Copyright (c) 2020 Marvin Borner + +COBJS = keymap.o #xml.o html.o +CC = ccache ../../cross/opt/bin/i686-elf-gcc +LD = ccache ../../cross/opt/bin/i686-elf-ld +AR = ccache ../../cross/opt/bin/i686-elf-ar + +CFLAGS = $(CFLAGS_DEFAULT) -I../ -I../libc/inc/ -Duserspace + +all: libtxt + +%.o: %.c + @$(CC) -c $(CFLAGS) $< -o $@ + +libtxt: $(COBJS) + @mkdir -p ../../build/ + @$(AR) rcs ../../build/libtxt.a $+ + +clean: + @find . -name "*.o" -type f -delete diff --git a/libs/libtxt/html.c b/libs/libtxt/html.c new file mode 100644 index 0000000..9295e17 --- /dev/null +++ b/libs/libtxt/html.c @@ -0,0 +1,238 @@ +// MIT License, Copyright (c) 2020 Marvin Borner +// HTML parsing is mainly based on the XML parser + +#include <assert.h> +#include <libgui/gui.h> +#include <libtxt/html.h> +#include <libtxt/xml.h> +#include <list.h> +#include <mem.h> +#include <print.h> +#include <str.h> + +static int is_self_closing(const char *tag) +{ + const char *void_elements[] = { "area", "base", "br", "col", "embed", "hr", "img", + "input", "link", "meta", "param", "source", "track", "wbr" }; + + for (u32 i = 0; i < sizeof(void_elements) / sizeof(void_elements[0]); ++i) { + if (!strcmp(void_elements[i], tag)) + return 1; + } + return 0; +} + +static char *normalize_tag_name(char *tag) +{ + for (char *p = tag; *p; ++p) + *p = *p > 0x40 && *p < 0x5b ? *p | 0x60 : *p; + return tag; +} + +static struct dom *new_object(const char *tag, struct dom *parent) +{ + struct dom *object = malloc(sizeof(*object)); + object->tag = strdup(tag); + object->parent = parent; + object->content = NULL; + object->children = list_new(); + return object; +} + +static void print_dom(struct dom *dom, u32 level) +{ + struct node *iterator = dom->children->head; + while (iterator != NULL) { + struct dom *obj = iterator->data; + for (u32 i = 0; i < level; i++) + print("\t"); + printf("'%s': '%s'\n", obj->tag, obj->content ? obj->content : ""); + if (obj->children->head) + print_dom(obj, level + 1); + iterator = iterator->next; + } +} + +static struct dom *generate_dom(char *data, u32 length) +{ + struct xml_token tokens[128]; + struct xml parser; + xml_init(&parser); + void *buffer = data; + enum xml_error error = xml_parse(&parser, buffer, length, tokens, 128); + + if (error != XML_SUCCESS && error != XML_ERROR_BUFFERDRY) { + printf("XML parse error: %d\n", err); + printf("DATA: '%s'\n", data); + return NULL; + } + + struct dom *root = new_object("root", NULL); + struct dom *current = root; + + static char name[256] = { 0 }; + for (u32 i = 0; i < parser.ntokens; i++) { + const struct xml_token *token = tokens + i; + name[0] = '\0'; + switch (token->type) { + case XML_START_TAG: + memcpy(&name, (u8 *)buffer + token->start_pos, + token->end_pos - token->start_pos); + name[token->end_pos - token->start_pos] = '\0'; + normalize_tag_name(name); + current = new_object(name, current); + printf("Adding %s to %s\n", current->tag, current->parent->tag); + list_add(current->parent->children, current); + if (is_self_closing(name)) + current = current->parent; + break; + case XML_END_TAG: + memcpy(&name, (u8 *)buffer + token->start_pos, + token->end_pos - token->start_pos); + name[token->end_pos - token->start_pos] = '\0'; + normalize_tag_name(name); + + if (is_self_closing(name)) + break; + + if (!current || !current->parent || strcmp(name, current->tag)) + return NULL; + + current = current->parent; + break; + case XML_CHARACTER: + if (!current) + continue; + + if (token->end_pos == token->start_pos + 2) { + const char *ptr = (char *)buffer + token->start_pos; + + if (ptr[0] == '\r' && ptr[1] == '\n') + continue; + } + memcpy(&name, (u8 *)buffer + token->start_pos, + token->end_pos - token->start_pos); + name[token->end_pos - token->start_pos] = '\0'; + char *clean_name = name; + for (char *p = name; *p; p++) { + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') { + clean_name++; + } else { + break; + } + } + if (!strlen(clean_name)) + break; + current->content = strdup(clean_name); + break; + default: + break; + } + + i += token->size; + } + + assert(root); + print("GENERATED!\n"); + print_dom(root, 0); + return root; +} + +static struct html_element *new_html_element(struct element *container, struct dom *dom) +{ + struct html_element *elem = malloc(sizeof(*elem)); + elem->x_offset = 0; + elem->y_offset = 0; + elem->dom = dom; + elem->obj = container; + return elem; +} + +// TODO: Better structure? +// TODO: Less code duplication (e.g. for headings) +#define CMP(tag, tag_string) (!strcmp((tag), (tag_string))) +static struct html_element *render_object(struct html_element *container, struct dom *dom) +{ + char *tag = dom->tag; + + assert(container); + if (CMP(tag, "html")) { + struct element *obj = + gui_add_container(container->obj, 0, 0, 100, 100, COLOR_WHITE); + return new_html_element(obj, dom); + } else if (CMP(tag, "body")) { + struct element *obj = + gui_add_container(container->obj, 0, 0, 100, 100, COLOR_WHITE); + return new_html_element(obj, dom); + } else if (CMP(tag, "h1")) { + struct element *obj = + gui_add_label(container->obj, container->x_offset, container->y_offset, + FONT_32, dom->content, COLOR_WHITE, COLOR_BLACK); + container->x_offset = 0; + container->y_offset += obj->ctx->size.y; + return new_html_element(obj, dom); + } else if (CMP(tag, "h2")) { + struct element *obj = + gui_add_label(container->obj, container->x_offset, container->y_offset, + FONT_24, dom->content, COLOR_WHITE, COLOR_BLACK); + container->x_offset = 0; + container->y_offset += obj->ctx->size.y; + return new_html_element(obj, dom); + } else if (CMP(tag, "h3")) { + struct element *obj = + gui_add_label(container->obj, container->x_offset, container->y_offset, + FONT_16, dom->content, COLOR_WHITE, COLOR_BLACK); + container->x_offset = 0; + container->y_offset += obj->ctx->size.y; + return new_html_element(obj, dom); + } else if (CMP(tag, "p")) { + struct element *obj = + gui_add_label(container->obj, container->x_offset, container->y_offset, + FONT_16, dom->content, COLOR_WHITE, COLOR_BLACK); + container->x_offset = 0; + container->y_offset += obj->ctx->size.y; + return new_html_element(obj, dom); + } else if (CMP(tag, "hr")) { + gfx_draw_rectangle(container->obj->ctx, + vec2(container->x_offset, container->y_offset), + vec2(container->obj->ctx->size.x - container->x_offset, + container->y_offset + 2), + COLOR_BLACK); + container->x_offset = 0; + container->y_offset += 2; + return container; + } else if (CMP(tag, "head") || CMP(tag, "meta") || CMP(tag, "title")) { + return container; + } else { + printf("UNKNOWN %s\n", tag); + if (dom->content && strlen(dom->content) > 0) { + struct element *obj = gui_add_label(container->obj, container->x_offset, + container->y_offset, FONT_16, + dom->content, COLOR_WHITE, COLOR_BLACK); + container->x_offset = 0; + container->y_offset += obj->ctx->size.y; + return new_html_element(obj, dom); + } + return container; + } +} + +int html_render_dom(struct html_element *container, struct dom *dom) +{ + struct node *iterator = dom->children->head; + while (iterator != NULL) { + struct dom *obj = iterator->data; + struct html_element *rendered = render_object(container, obj); + if (obj->children->head && rendered) + html_render_dom(rendered, obj); + iterator = iterator->next; + } + return 1; +} + +int html_render(struct element *container, char *data, u32 length) +{ + struct dom *dom = generate_dom(data, length); + struct html_element *obj = new_html_element(container, dom); + return dom && obj && html_render_dom(obj, dom); +} diff --git a/libs/libtxt/html.h b/libs/libtxt/html.h new file mode 100644 index 0000000..c1b29f2 --- /dev/null +++ b/libs/libtxt/html.h @@ -0,0 +1,26 @@ +// MIT License, Copyright (c) 2020 Marvin Borner +// HTML parsing is mainly based on the XML parser + +#ifndef HTML_H +#define HTML_H + +#include <def.h> +#include <list.h> + +struct dom { + char *tag; + char *content; + struct dom *parent; + struct list *children; +}; + +struct html_element { + u32 x_offset; + u32 y_offset; + struct dom *dom; + struct element *obj; +}; + +int html_render(struct element *container, char *data, u32 length); + +#endif diff --git a/libs/libtxt/keymap.c b/libs/libtxt/keymap.c new file mode 100644 index 0000000..67054f6 --- /dev/null +++ b/libs/libtxt/keymap.c @@ -0,0 +1,89 @@ +// MIT License, Copyright (c) 2020 Marvin Borner + +#include <def.h> +#include <libtxt/keymap.h> +#include <mem.h> +#include <print.h> +#include <sys.h> + +static void map(struct keymap *keymap, int line, char ch, int index) +{ + switch (line) { + case 0: + keymap->map[index] = ch; + break; + case 1: + keymap->shift_map[index] = ch; + break; + case 2: + keymap->alt_map[index] = ch; + break; + default: + break; + } +} + +// Very ugly code but it should work for now +struct keymap *keymap_parse(const char *path) +{ + char *keymap_src = sread(path); + if (!keymap_src) + return NULL; + struct keymap *keymap = malloc(sizeof(*keymap)); + + int index = 0; + int ch_index = 0; + char ch; + int escaped = 0; + int line = 0; + int skip = 0; + while ((ch = keymap_src[index]) != '\0' || escaped) { + if (ch == ' ' && !skip) { + skip = 1; + index++; + continue; + } else if (ch == '\n') { + ch_index = 0; + index++; + line++; + continue; + } else if (ch == '\\' && !escaped) { + escaped = 1; + index++; + continue; + } + skip = 0; + + if (ch == ' ' && !escaped) + ch = 0; + + ch_index++; + if (escaped) { + switch (ch) { + case 'b': + ch = '\b'; + break; + case 't': + ch = '\t'; + break; + case 'n': + ch = '\n'; + break; + case '\\': + ch = '\\'; + break; + case ' ': + ch = ' '; + break; + default: + print("Unknown escape!\n"); + } + escaped = 0; + } + + map(keymap, line, ch, ch_index); + index++; + } + + return keymap; +} diff --git a/libs/libtxt/keymap.h b/libs/libtxt/keymap.h new file mode 100644 index 0000000..9f1966e --- /dev/null +++ b/libs/libtxt/keymap.h @@ -0,0 +1,16 @@ +// MIT License, Copyright (c) 2020 Marvin Borner + +#ifndef KEYMAP_H +#define KEYMAP_H + +#define KEYMAP_LENGTH 90 + +struct keymap { + char map[KEYMAP_LENGTH]; + char shift_map[KEYMAP_LENGTH]; + char alt_map[KEYMAP_LENGTH]; +}; + +struct keymap *keymap_parse(const char *path); + +#endif diff --git a/libs/libtxt/xml.c b/libs/libtxt/xml.c new file mode 100644 index 0000000..9a5fd76 --- /dev/null +++ b/libs/libtxt/xml.c @@ -0,0 +1,514 @@ +// Inspired by sxml (capmar) +// MIT License, Copyright (c) 2020 Marvin Borner + +#include <assert.h> +#include <libtxt/xml.h> +#include <mem.h> +#include <str.h> + +static const char *str_findchr(const char *start, const char *end, int c) +{ + const char *it; + + assert(start <= end); + assert(0 <= c && c <= 127); + + it = (const char *)memchr((void *)start, c, end - start); + return (it != NULL) ? it : end; +} + +static const char *str_findstr(const char *start, const char *end, const char *needle) +{ + u32 needlelen; + int first; + assert(start <= end); + + needlelen = strlen(needle); + assert(0 < needlelen); + first = (u8)needle[0]; + + while (start + needlelen <= end) { + const char *it = + (const char *)memchr((void *)start, first, (end - start) - (needlelen - 1)); + if (it == NULL) + break; + + if (memcmp(it, needle, needlelen) == 0) + return it; + + start = it + 1; + } + + return end; +} + +static int str_starts_with(const char *start, const char *end, const char *prefix) +{ + long nbytes; + assert(start <= end); + + nbytes = strlen(prefix); + if (end - start < nbytes) + return 0; + + return memcmp(prefix, start, nbytes) == 0; +} + +static int white_space(int c) +{ + switch (c) { + case ' ': + case '\t': + case '\r': + case '\n': + return 1; + } + + return 0; +} + +static int name_start_char(int c) +{ + if (0x80 <= c) + return 1; + + return c == ':' || ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z'); +} + +static int name_char(int c) +{ + return name_start_char(c) || c == '-' || c == '.' || ('0' <= c && c <= '9') || c == 0xB7 || + (0x0300 <= c && c <= 0x036F) || (0x203F <= c && c <= 0x2040); +} + +#define is_space(c) (white_space(((u8)(c)))) +#define is_alpha(c) (name_start_char(((u8)(c)))) +#define is_alnum(c) (name_char(((u8)(c)))) + +static const char *str_ltrim(const char *start, const char *end) +{ + const char *it; + assert(start <= end); + + for (it = start; it != end && is_space(*it); it++) + ; + + return it; +} + +static const char *str_rtrim(const char *start, const char *end) +{ + const char *it, *prev; + assert(start <= end); + + for (it = end; start != it; it = prev) { + prev = it - 1; + if (!is_space(*prev)) + return it; + } + + return start; +} + +static const char *str_find_notalnum(const char *start, const char *end) +{ + const char *it; + assert(start <= end); + + for (it = start; it != end && is_alnum(*it); it++) + ; + + return it; +} + +#define buffer_from_offset(args, i) ((args)->buffer + (i)) +#define buffer_tooffset(args, ptr) (unsigned)((ptr) - (args)->buffer) +#define buffer_getend(args) ((args)->buffer + (args)->buffer_length) + +static int state_push_token(struct xml *state, struct xml_args *args, enum xml_type type, + const char *start, const char *end) +{ + struct xml_token *token; + u32 i; + if (args->num_tokens <= state->ntokens) + return 0; + + i = state->ntokens++; + token = &args->tokens[i]; + token->type = type; + token->start_pos = buffer_tooffset(args, start); + token->end_pos = buffer_tooffset(args, end); + token->size = 0; + + switch (type) { + case XML_START_TAG: + state->tag_level++; + break; + + case XML_END_TAG: + assert(0 < state->tag_level); + state->tag_level--; + break; + + default: + break; + } + + return 1; +} + +static enum xml_error state_set_pos(struct xml *state, const struct xml_args *args, const char *ptr) +{ + state->buffer_pos = buffer_tooffset(args, ptr); + return (state->ntokens <= args->num_tokens) ? XML_SUCCESS : XML_ERROR_TOKENSFULL; +} + +#define state_commit(dest, src) memcpy((dest), (src), sizeof(struct xml)) + +#define XML_ERROR_STRICT XML_ERROR_INVALID +#define ENTITY_MAXLEN 8 +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +static enum xml_error parse_characters(struct xml *state, struct xml_args *args, const char *end) +{ + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *limit, *colon, *ampr = str_findchr(start, end, '&'); + assert(end <= buffer_getend(args)); + + if (ampr != start) + state_push_token(state, args, XML_CHARACTER, start, ampr); + + if (ampr == end) + return state_set_pos(state, args, ampr); + + limit = MIN(ampr + ENTITY_MAXLEN, end); + colon = str_findchr(ampr, limit, ';'); + if (colon == limit) + return (limit == end) ? XML_ERROR_BUFFERDRY : XML_ERROR_INVALID; + + start = colon + 1; + state_push_token(state, args, XML_CHARACTER, ampr, start); + return state_set_pos(state, args, start); +} + +static enum xml_error parse_attrvalue(struct xml *state, struct xml_args *args, const char *end) +{ + while (buffer_from_offset(args, state->buffer_pos) != end) { + enum xml_error error = parse_characters(state, args, end); + if (error != XML_SUCCESS) + return error; + } + + return XML_SUCCESS; +} + +static enum xml_error parse_attributes(struct xml *state, struct xml_args *args) +{ + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + const char *name = str_ltrim(start, end); + + u32 ntokens = state->ntokens; + assert(0 < ntokens); + + while (name != end && is_alpha(*name)) { + const char *eq, *space, *quot, *value; + enum xml_error error; + + eq = str_findchr(name, end, '='); + if (eq == end) + return XML_ERROR_BUFFERDRY; + + space = str_rtrim(name, eq); + state_push_token(state, args, XML_CDATA, name, space); + + quot = str_ltrim(eq + 1, end); + if (quot == end) + return XML_ERROR_BUFFERDRY; + else if (*quot != '\'' && *quot != '"') + return XML_ERROR_INVALID; + + value = quot + 1; + quot = str_findchr(value, end, *quot); + if (quot == end) + return XML_ERROR_BUFFERDRY; + + state_set_pos(state, args, value); + error = parse_attrvalue(state, args, quot); + if (error != XML_SUCCESS) + return error; + + name = str_ltrim(quot + 1, end); + } + + { + struct xml_token *token = args->tokens + (ntokens - 1); + token->size = (u16)(state->ntokens - ntokens); + } + + return state_set_pos(state, args, name); +} + +#define TAG_LEN(str) (sizeof(str) - 1) +#define TAG_MINSIZE 1 + +static enum xml_error parse_comment(struct xml *state, struct xml_args *args) +{ + static const char START_TAG[] = "<!--"; + static const char END_TAG[] = "-->"; + + const char *dash; + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + if (end - start < (int)TAG_LEN(START_TAG)) + return XML_ERROR_BUFFERDRY; + + if (!str_starts_with(start, end, START_TAG)) + return XML_ERROR_INVALID; + + start += TAG_LEN(START_TAG); + dash = str_findstr(start, end, END_TAG); + if (dash == end) + return XML_ERROR_BUFFERDRY; + + state_push_token(state, args, XML_COMMENT, start, dash); + return state_set_pos(state, args, dash + TAG_LEN(END_TAG)); +} + +static enum xml_error parse_instruction(struct xml *state, struct xml_args *args) +{ + static const char START_TAG[] = "<?"; + static const char END_TAG[] = "?>"; + + enum xml_error error; + const char *quest, *space; + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + assert(TAG_MINSIZE <= end - start); + + if (!str_starts_with(start, end, START_TAG)) + return XML_ERROR_INVALID; + + start += TAG_LEN(START_TAG); + space = str_find_notalnum(start, end); + if (space == end) + return XML_ERROR_BUFFERDRY; + + state_push_token(state, args, XML_INSTRUCTION, start, space); + + state_set_pos(state, args, space); + error = parse_attributes(state, args); + if (error != XML_SUCCESS) + return error; + + quest = buffer_from_offset(args, state->buffer_pos); + if (end - quest < (int)TAG_LEN(END_TAG)) + return XML_ERROR_BUFFERDRY; + + if (!str_starts_with(quest, end, END_TAG)) + return XML_ERROR_INVALID; + + return state_set_pos(state, args, quest + TAG_LEN(END_TAG)); +} + +static enum xml_error parse_doctype(struct xml *state, struct xml_args *args) +{ + static const char START_TAG[] = "<!DOCTYPE"; + static const char END_TAG[] = ">"; + + const char *bracket; + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + if (end - start < (int)TAG_LEN(START_TAG)) + return XML_ERROR_BUFFERDRY; + + if (!str_starts_with(start, end, START_TAG)) + return XML_ERROR_BUFFERDRY; + + start += TAG_LEN(START_TAG); + bracket = str_findstr(start, end, END_TAG); + if (bracket == end) + return XML_ERROR_BUFFERDRY; + + state_push_token(state, args, XML_DOCTYPE, start, bracket); + return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); +} + +static enum xml_error parse_start(struct xml *state, struct xml_args *args) +{ + enum xml_error error; + const char *gt, *name, *space; + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + assert(TAG_MINSIZE <= end - start); + + if (!(start[0] == '<' && is_alpha(start[1]))) + return XML_ERROR_INVALID; + + name = start + 1; + space = str_find_notalnum(name, end); + if (space == end) + return XML_ERROR_BUFFERDRY; + + state_push_token(state, args, XML_START_TAG, name, space); + + state_set_pos(state, args, space); + error = parse_attributes(state, args); + if (error != XML_SUCCESS) + return error; + + gt = buffer_from_offset(args, state->buffer_pos); + + if (gt != end && *gt == '/') { + state_push_token(state, args, XML_END_TAG, name, space); + gt++; + } + + if (gt == end) + return XML_ERROR_BUFFERDRY; + + if (*gt != '>') + return XML_ERROR_INVALID; + + return state_set_pos(state, args, gt + 1); +} + +static enum xml_error parse_end(struct xml *state, struct xml_args *args) +{ + const char *gt, *space; + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + assert(TAG_MINSIZE <= end - start); + + if (!(str_starts_with(start, end, "</") && is_alpha(start[2]))) + return XML_ERROR_INVALID; + + start += 2; + gt = str_findchr(start, end, '>'); + if (gt == end) + return XML_ERROR_BUFFERDRY; + + space = str_find_notalnum(start, gt); + if (str_ltrim(space, gt) != gt) + return XML_ERROR_STRICT; + + state_push_token(state, args, XML_END_TAG, start, space); + return state_set_pos(state, args, gt + 1); +} + +static enum xml_error parse_cdata(struct xml *state, struct xml_args *args) +{ + static const char START_TAG[] = "<![CDATA["; + static const char END_TAG[] = "]]>"; + + const char *bracket; + const char *start = buffer_from_offset(args, state->buffer_pos); + const char *end = buffer_getend(args); + if (end - start < (int)TAG_LEN(START_TAG)) + return XML_ERROR_BUFFERDRY; + + if (!str_starts_with(start, end, START_TAG)) + return XML_ERROR_INVALID; + + start += TAG_LEN(START_TAG); + bracket = str_findstr(start, end, END_TAG); + if (bracket == end) + return XML_ERROR_BUFFERDRY; + + state_push_token(state, args, XML_CDATA, start, bracket); + return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); +} + +void xml_init(struct xml *state) +{ + state->buffer_pos = 0; + state->ntokens = 0; + state->tag_level = 0; +} + +#define ROOT_FOUND(state) (0 < (state)->tag_level) +#define ROOT_PARSED(state) ((state)->tag_level == 0) + +enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_length, + struct xml_token tokens[], u32 num_tokens) +{ + struct xml temp = *state; + const char *end = buffer + buffer_length; + + struct xml_args args; + args.buffer = buffer; + args.buffer_length = buffer_length; + args.tokens = tokens; + args.num_tokens = num_tokens; + + while (!ROOT_FOUND(&temp)) { + enum xml_error error; + const char *start = buffer_from_offset(&args, temp.buffer_pos); + const char *lt = str_ltrim(start, end); + state_set_pos(&temp, &args, lt); + state_commit(state, &temp); + + if (end - lt < TAG_MINSIZE) + return XML_ERROR_BUFFERDRY; + + if (*lt != '<') + return XML_ERROR_INVALID; + + switch (lt[1]) { + case '?': + error = parse_instruction(&temp, &args); + break; + case '!': + error = (lt[2] == '-') ? parse_comment(&temp, &args) : + parse_doctype(&temp, &args); + break; + default: + error = parse_start(&temp, &args); + break; + } + + if (error != XML_SUCCESS) + return error; + + state_commit(state, &temp); + } + + while (!ROOT_PARSED(&temp)) { + enum xml_error error; + const char *start = buffer_from_offset(&args, temp.buffer_pos); + const char *lt = str_findchr(start, end, '<'); + while (buffer_from_offset(&args, temp.buffer_pos) != lt) { + error = parse_characters(&temp, &args, lt); + if (error != XML_SUCCESS) + return error; + + state_commit(state, &temp); + } + + if (end - lt < TAG_MINSIZE) + return XML_ERROR_BUFFERDRY; + + switch (lt[1]) { + case '?': + error = parse_instruction(&temp, &args); + break; + case '/': + error = parse_end(&temp, &args); + break; + case '!': + error = (lt[2] == '-') ? parse_comment(&temp, &args) : + parse_cdata(&temp, &args); + break; + default: + error = parse_start(&temp, &args); + break; + } + + if (error != XML_SUCCESS) + return error; + + state_commit(state, &temp); + } + + return XML_SUCCESS; +} diff --git a/libs/libtxt/xml.h b/libs/libtxt/xml.h new file mode 100644 index 0000000..43a8005 --- /dev/null +++ b/libs/libtxt/xml.h @@ -0,0 +1,51 @@ +// Inspired by sxml (capmar) +// MIT License, Copyright (c) 2020 Marvin Borner + +#ifndef XML_H +#define XML_H + +#include <def.h> + +enum xml_error { + XML_ERROR_INVALID = -1, + XML_SUCCESS = 0, + XML_ERROR_BUFFERDRY = 1, + XML_ERROR_TOKENSFULL = 2 +}; + +struct xml_token { + u16 type; + u16 size; + u32 start_pos; + u32 end_pos; +}; + +struct xml_args { + const char *buffer; + u32 buffer_length; + struct xml_token *tokens; + u32 num_tokens; +}; + +enum xml_type { + XML_START_TAG, + XML_END_TAG, + XML_CHARACTER, + XML_CDATA, + XML_INSTRUCTION, + XML_DOCTYPE, + XML_COMMENT +}; + +struct xml { + u32 buffer_pos; + u32 ntokens; + u32 tag_level; +}; + +enum xml_error xml_parse(struct xml *parser, const char *buffer, u32 buffer_length, + struct xml_token *tokens, u32 num_tokens); + +void xml_init(struct xml *parser); + +#endif |