aboutsummaryrefslogtreecommitdiff
path: root/libs/libtxt
diff options
context:
space:
mode:
authorMarvin Borner2021-03-26 21:55:50 +0100
committerMarvin Borner2021-03-26 22:02:20 +0100
commit05498860e8f7b1e8bb27880bc7526de026694804 (patch)
tree3bddf16e9439a950a3810d45e42a5cefdbcb7663 /libs/libtxt
parenta96e9c4c858d47f61b89d879aa0ce6a02bdacb38 (diff)
Renamed libs
Cleaner and more flexible.
Diffstat (limited to 'libs/libtxt')
-rw-r--r--libs/libtxt/Makefile20
-rw-r--r--libs/libtxt/html.c238
-rw-r--r--libs/libtxt/html.h26
-rw-r--r--libs/libtxt/keymap.c89
-rw-r--r--libs/libtxt/keymap.h16
-rw-r--r--libs/libtxt/xml.c514
-rw-r--r--libs/libtxt/xml.h51
7 files changed, 954 insertions, 0 deletions
diff --git a/libs/libtxt/Makefile b/libs/libtxt/Makefile
new file mode 100644
index 0000000..93a7993
--- /dev/null
+++ b/libs/libtxt/Makefile
@@ -0,0 +1,20 @@
+# MIT License, Copyright (c) 2020 Marvin Borner
+
+COBJS = keymap.o #xml.o html.o
+CC = ccache ../../cross/opt/bin/i686-elf-gcc
+LD = ccache ../../cross/opt/bin/i686-elf-ld
+AR = ccache ../../cross/opt/bin/i686-elf-ar
+
+CFLAGS = $(CFLAGS_DEFAULT) -I../ -I../libc/inc/ -Duserspace
+
+all: libtxt
+
+%.o: %.c
+ @$(CC) -c $(CFLAGS) $< -o $@
+
+libtxt: $(COBJS)
+ @mkdir -p ../../build/
+ @$(AR) rcs ../../build/libtxt.a $+
+
+clean:
+ @find . -name "*.o" -type f -delete
diff --git a/libs/libtxt/html.c b/libs/libtxt/html.c
new file mode 100644
index 0000000..9295e17
--- /dev/null
+++ b/libs/libtxt/html.c
@@ -0,0 +1,238 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+// HTML parsing is mainly based on the XML parser
+
+#include <assert.h>
+#include <libgui/gui.h>
+#include <libtxt/html.h>
+#include <libtxt/xml.h>
+#include <list.h>
+#include <mem.h>
+#include <print.h>
+#include <str.h>
+
+static int is_self_closing(const char *tag)
+{
+ const char *void_elements[] = { "area", "base", "br", "col", "embed", "hr", "img",
+ "input", "link", "meta", "param", "source", "track", "wbr" };
+
+ for (u32 i = 0; i < sizeof(void_elements) / sizeof(void_elements[0]); ++i) {
+ if (!strcmp(void_elements[i], tag))
+ return 1;
+ }
+ return 0;
+}
+
+static char *normalize_tag_name(char *tag)
+{
+ for (char *p = tag; *p; ++p)
+ *p = *p > 0x40 && *p < 0x5b ? *p | 0x60 : *p;
+ return tag;
+}
+
+static struct dom *new_object(const char *tag, struct dom *parent)
+{
+ struct dom *object = malloc(sizeof(*object));
+ object->tag = strdup(tag);
+ object->parent = parent;
+ object->content = NULL;
+ object->children = list_new();
+ return object;
+}
+
+static void print_dom(struct dom *dom, u32 level)
+{
+ struct node *iterator = dom->children->head;
+ while (iterator != NULL) {
+ struct dom *obj = iterator->data;
+ for (u32 i = 0; i < level; i++)
+ print("\t");
+ printf("'%s': '%s'\n", obj->tag, obj->content ? obj->content : "");
+ if (obj->children->head)
+ print_dom(obj, level + 1);
+ iterator = iterator->next;
+ }
+}
+
+static struct dom *generate_dom(char *data, u32 length)
+{
+ struct xml_token tokens[128];
+ struct xml parser;
+ xml_init(&parser);
+ void *buffer = data;
+ enum xml_error error = xml_parse(&parser, buffer, length, tokens, 128);
+
+ if (error != XML_SUCCESS && error != XML_ERROR_BUFFERDRY) {
+ printf("XML parse error: %d\n", err);
+ printf("DATA: '%s'\n", data);
+ return NULL;
+ }
+
+ struct dom *root = new_object("root", NULL);
+ struct dom *current = root;
+
+ static char name[256] = { 0 };
+ for (u32 i = 0; i < parser.ntokens; i++) {
+ const struct xml_token *token = tokens + i;
+ name[0] = '\0';
+ switch (token->type) {
+ case XML_START_TAG:
+ memcpy(&name, (u8 *)buffer + token->start_pos,
+ token->end_pos - token->start_pos);
+ name[token->end_pos - token->start_pos] = '\0';
+ normalize_tag_name(name);
+ current = new_object(name, current);
+ printf("Adding %s to %s\n", current->tag, current->parent->tag);
+ list_add(current->parent->children, current);
+ if (is_self_closing(name))
+ current = current->parent;
+ break;
+ case XML_END_TAG:
+ memcpy(&name, (u8 *)buffer + token->start_pos,
+ token->end_pos - token->start_pos);
+ name[token->end_pos - token->start_pos] = '\0';
+ normalize_tag_name(name);
+
+ if (is_self_closing(name))
+ break;
+
+ if (!current || !current->parent || strcmp(name, current->tag))
+ return NULL;
+
+ current = current->parent;
+ break;
+ case XML_CHARACTER:
+ if (!current)
+ continue;
+
+ if (token->end_pos == token->start_pos + 2) {
+ const char *ptr = (char *)buffer + token->start_pos;
+
+ if (ptr[0] == '\r' && ptr[1] == '\n')
+ continue;
+ }
+ memcpy(&name, (u8 *)buffer + token->start_pos,
+ token->end_pos - token->start_pos);
+ name[token->end_pos - token->start_pos] = '\0';
+ char *clean_name = name;
+ for (char *p = name; *p; p++) {
+ if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
+ clean_name++;
+ } else {
+ break;
+ }
+ }
+ if (!strlen(clean_name))
+ break;
+ current->content = strdup(clean_name);
+ break;
+ default:
+ break;
+ }
+
+ i += token->size;
+ }
+
+ assert(root);
+ print("GENERATED!\n");
+ print_dom(root, 0);
+ return root;
+}
+
+static struct html_element *new_html_element(struct element *container, struct dom *dom)
+{
+ struct html_element *elem = malloc(sizeof(*elem));
+ elem->x_offset = 0;
+ elem->y_offset = 0;
+ elem->dom = dom;
+ elem->obj = container;
+ return elem;
+}
+
+// TODO: Better structure?
+// TODO: Less code duplication (e.g. for headings)
+#define CMP(tag, tag_string) (!strcmp((tag), (tag_string)))
+static struct html_element *render_object(struct html_element *container, struct dom *dom)
+{
+ char *tag = dom->tag;
+
+ assert(container);
+ if (CMP(tag, "html")) {
+ struct element *obj =
+ gui_add_container(container->obj, 0, 0, 100, 100, COLOR_WHITE);
+ return new_html_element(obj, dom);
+ } else if (CMP(tag, "body")) {
+ struct element *obj =
+ gui_add_container(container->obj, 0, 0, 100, 100, COLOR_WHITE);
+ return new_html_element(obj, dom);
+ } else if (CMP(tag, "h1")) {
+ struct element *obj =
+ gui_add_label(container->obj, container->x_offset, container->y_offset,
+ FONT_32, dom->content, COLOR_WHITE, COLOR_BLACK);
+ container->x_offset = 0;
+ container->y_offset += obj->ctx->size.y;
+ return new_html_element(obj, dom);
+ } else if (CMP(tag, "h2")) {
+ struct element *obj =
+ gui_add_label(container->obj, container->x_offset, container->y_offset,
+ FONT_24, dom->content, COLOR_WHITE, COLOR_BLACK);
+ container->x_offset = 0;
+ container->y_offset += obj->ctx->size.y;
+ return new_html_element(obj, dom);
+ } else if (CMP(tag, "h3")) {
+ struct element *obj =
+ gui_add_label(container->obj, container->x_offset, container->y_offset,
+ FONT_16, dom->content, COLOR_WHITE, COLOR_BLACK);
+ container->x_offset = 0;
+ container->y_offset += obj->ctx->size.y;
+ return new_html_element(obj, dom);
+ } else if (CMP(tag, "p")) {
+ struct element *obj =
+ gui_add_label(container->obj, container->x_offset, container->y_offset,
+ FONT_16, dom->content, COLOR_WHITE, COLOR_BLACK);
+ container->x_offset = 0;
+ container->y_offset += obj->ctx->size.y;
+ return new_html_element(obj, dom);
+ } else if (CMP(tag, "hr")) {
+ gfx_draw_rectangle(container->obj->ctx,
+ vec2(container->x_offset, container->y_offset),
+ vec2(container->obj->ctx->size.x - container->x_offset,
+ container->y_offset + 2),
+ COLOR_BLACK);
+ container->x_offset = 0;
+ container->y_offset += 2;
+ return container;
+ } else if (CMP(tag, "head") || CMP(tag, "meta") || CMP(tag, "title")) {
+ return container;
+ } else {
+ printf("UNKNOWN %s\n", tag);
+ if (dom->content && strlen(dom->content) > 0) {
+ struct element *obj = gui_add_label(container->obj, container->x_offset,
+ container->y_offset, FONT_16,
+ dom->content, COLOR_WHITE, COLOR_BLACK);
+ container->x_offset = 0;
+ container->y_offset += obj->ctx->size.y;
+ return new_html_element(obj, dom);
+ }
+ return container;
+ }
+}
+
+int html_render_dom(struct html_element *container, struct dom *dom)
+{
+ struct node *iterator = dom->children->head;
+ while (iterator != NULL) {
+ struct dom *obj = iterator->data;
+ struct html_element *rendered = render_object(container, obj);
+ if (obj->children->head && rendered)
+ html_render_dom(rendered, obj);
+ iterator = iterator->next;
+ }
+ return 1;
+}
+
+int html_render(struct element *container, char *data, u32 length)
+{
+ struct dom *dom = generate_dom(data, length);
+ struct html_element *obj = new_html_element(container, dom);
+ return dom && obj && html_render_dom(obj, dom);
+}
diff --git a/libs/libtxt/html.h b/libs/libtxt/html.h
new file mode 100644
index 0000000..c1b29f2
--- /dev/null
+++ b/libs/libtxt/html.h
@@ -0,0 +1,26 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+// HTML parsing is mainly based on the XML parser
+
+#ifndef HTML_H
+#define HTML_H
+
+#include <def.h>
+#include <list.h>
+
+struct dom {
+ char *tag;
+ char *content;
+ struct dom *parent;
+ struct list *children;
+};
+
+struct html_element {
+ u32 x_offset;
+ u32 y_offset;
+ struct dom *dom;
+ struct element *obj;
+};
+
+int html_render(struct element *container, char *data, u32 length);
+
+#endif
diff --git a/libs/libtxt/keymap.c b/libs/libtxt/keymap.c
new file mode 100644
index 0000000..67054f6
--- /dev/null
+++ b/libs/libtxt/keymap.c
@@ -0,0 +1,89 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+
+#include <def.h>
+#include <libtxt/keymap.h>
+#include <mem.h>
+#include <print.h>
+#include <sys.h>
+
+static void map(struct keymap *keymap, int line, char ch, int index)
+{
+ switch (line) {
+ case 0:
+ keymap->map[index] = ch;
+ break;
+ case 1:
+ keymap->shift_map[index] = ch;
+ break;
+ case 2:
+ keymap->alt_map[index] = ch;
+ break;
+ default:
+ break;
+ }
+}
+
+// Very ugly code but it should work for now
+struct keymap *keymap_parse(const char *path)
+{
+ char *keymap_src = sread(path);
+ if (!keymap_src)
+ return NULL;
+ struct keymap *keymap = malloc(sizeof(*keymap));
+
+ int index = 0;
+ int ch_index = 0;
+ char ch;
+ int escaped = 0;
+ int line = 0;
+ int skip = 0;
+ while ((ch = keymap_src[index]) != '\0' || escaped) {
+ if (ch == ' ' && !skip) {
+ skip = 1;
+ index++;
+ continue;
+ } else if (ch == '\n') {
+ ch_index = 0;
+ index++;
+ line++;
+ continue;
+ } else if (ch == '\\' && !escaped) {
+ escaped = 1;
+ index++;
+ continue;
+ }
+ skip = 0;
+
+ if (ch == ' ' && !escaped)
+ ch = 0;
+
+ ch_index++;
+ if (escaped) {
+ switch (ch) {
+ case 'b':
+ ch = '\b';
+ break;
+ case 't':
+ ch = '\t';
+ break;
+ case 'n':
+ ch = '\n';
+ break;
+ case '\\':
+ ch = '\\';
+ break;
+ case ' ':
+ ch = ' ';
+ break;
+ default:
+ print("Unknown escape!\n");
+ }
+ escaped = 0;
+ }
+
+ map(keymap, line, ch, ch_index);
+ index++;
+ }
+
+ return keymap;
+}
diff --git a/libs/libtxt/keymap.h b/libs/libtxt/keymap.h
new file mode 100644
index 0000000..9f1966e
--- /dev/null
+++ b/libs/libtxt/keymap.h
@@ -0,0 +1,16 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+
+#ifndef KEYMAP_H
+#define KEYMAP_H
+
+#define KEYMAP_LENGTH 90
+
+struct keymap {
+ char map[KEYMAP_LENGTH];
+ char shift_map[KEYMAP_LENGTH];
+ char alt_map[KEYMAP_LENGTH];
+};
+
+struct keymap *keymap_parse(const char *path);
+
+#endif
diff --git a/libs/libtxt/xml.c b/libs/libtxt/xml.c
new file mode 100644
index 0000000..9a5fd76
--- /dev/null
+++ b/libs/libtxt/xml.c
@@ -0,0 +1,514 @@
+// Inspired by sxml (capmar)
+// MIT License, Copyright (c) 2020 Marvin Borner
+
+#include <assert.h>
+#include <libtxt/xml.h>
+#include <mem.h>
+#include <str.h>
+
+static const char *str_findchr(const char *start, const char *end, int c)
+{
+ const char *it;
+
+ assert(start <= end);
+ assert(0 <= c && c <= 127);
+
+ it = (const char *)memchr((void *)start, c, end - start);
+ return (it != NULL) ? it : end;
+}
+
+static const char *str_findstr(const char *start, const char *end, const char *needle)
+{
+ u32 needlelen;
+ int first;
+ assert(start <= end);
+
+ needlelen = strlen(needle);
+ assert(0 < needlelen);
+ first = (u8)needle[0];
+
+ while (start + needlelen <= end) {
+ const char *it =
+ (const char *)memchr((void *)start, first, (end - start) - (needlelen - 1));
+ if (it == NULL)
+ break;
+
+ if (memcmp(it, needle, needlelen) == 0)
+ return it;
+
+ start = it + 1;
+ }
+
+ return end;
+}
+
+static int str_starts_with(const char *start, const char *end, const char *prefix)
+{
+ long nbytes;
+ assert(start <= end);
+
+ nbytes = strlen(prefix);
+ if (end - start < nbytes)
+ return 0;
+
+ return memcmp(prefix, start, nbytes) == 0;
+}
+
+static int white_space(int c)
+{
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ return 1;
+ }
+
+ return 0;
+}
+
+static int name_start_char(int c)
+{
+ if (0x80 <= c)
+ return 1;
+
+ return c == ':' || ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z');
+}
+
+static int name_char(int c)
+{
+ return name_start_char(c) || c == '-' || c == '.' || ('0' <= c && c <= '9') || c == 0xB7 ||
+ (0x0300 <= c && c <= 0x036F) || (0x203F <= c && c <= 0x2040);
+}
+
+#define is_space(c) (white_space(((u8)(c))))
+#define is_alpha(c) (name_start_char(((u8)(c))))
+#define is_alnum(c) (name_char(((u8)(c))))
+
+static const char *str_ltrim(const char *start, const char *end)
+{
+ const char *it;
+ assert(start <= end);
+
+ for (it = start; it != end && is_space(*it); it++)
+ ;
+
+ return it;
+}
+
+static const char *str_rtrim(const char *start, const char *end)
+{
+ const char *it, *prev;
+ assert(start <= end);
+
+ for (it = end; start != it; it = prev) {
+ prev = it - 1;
+ if (!is_space(*prev))
+ return it;
+ }
+
+ return start;
+}
+
+static const char *str_find_notalnum(const char *start, const char *end)
+{
+ const char *it;
+ assert(start <= end);
+
+ for (it = start; it != end && is_alnum(*it); it++)
+ ;
+
+ return it;
+}
+
+#define buffer_from_offset(args, i) ((args)->buffer + (i))
+#define buffer_tooffset(args, ptr) (unsigned)((ptr) - (args)->buffer)
+#define buffer_getend(args) ((args)->buffer + (args)->buffer_length)
+
+static int state_push_token(struct xml *state, struct xml_args *args, enum xml_type type,
+ const char *start, const char *end)
+{
+ struct xml_token *token;
+ u32 i;
+ if (args->num_tokens <= state->ntokens)
+ return 0;
+
+ i = state->ntokens++;
+ token = &args->tokens[i];
+ token->type = type;
+ token->start_pos = buffer_tooffset(args, start);
+ token->end_pos = buffer_tooffset(args, end);
+ token->size = 0;
+
+ switch (type) {
+ case XML_START_TAG:
+ state->tag_level++;
+ break;
+
+ case XML_END_TAG:
+ assert(0 < state->tag_level);
+ state->tag_level--;
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static enum xml_error state_set_pos(struct xml *state, const struct xml_args *args, const char *ptr)
+{
+ state->buffer_pos = buffer_tooffset(args, ptr);
+ return (state->ntokens <= args->num_tokens) ? XML_SUCCESS : XML_ERROR_TOKENSFULL;
+}
+
+#define state_commit(dest, src) memcpy((dest), (src), sizeof(struct xml))
+
+#define XML_ERROR_STRICT XML_ERROR_INVALID
+#define ENTITY_MAXLEN 8
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static enum xml_error parse_characters(struct xml *state, struct xml_args *args, const char *end)
+{
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *limit, *colon, *ampr = str_findchr(start, end, '&');
+ assert(end <= buffer_getend(args));
+
+ if (ampr != start)
+ state_push_token(state, args, XML_CHARACTER, start, ampr);
+
+ if (ampr == end)
+ return state_set_pos(state, args, ampr);
+
+ limit = MIN(ampr + ENTITY_MAXLEN, end);
+ colon = str_findchr(ampr, limit, ';');
+ if (colon == limit)
+ return (limit == end) ? XML_ERROR_BUFFERDRY : XML_ERROR_INVALID;
+
+ start = colon + 1;
+ state_push_token(state, args, XML_CHARACTER, ampr, start);
+ return state_set_pos(state, args, start);
+}
+
+static enum xml_error parse_attrvalue(struct xml *state, struct xml_args *args, const char *end)
+{
+ while (buffer_from_offset(args, state->buffer_pos) != end) {
+ enum xml_error error = parse_characters(state, args, end);
+ if (error != XML_SUCCESS)
+ return error;
+ }
+
+ return XML_SUCCESS;
+}
+
+static enum xml_error parse_attributes(struct xml *state, struct xml_args *args)
+{
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ const char *name = str_ltrim(start, end);
+
+ u32 ntokens = state->ntokens;
+ assert(0 < ntokens);
+
+ while (name != end && is_alpha(*name)) {
+ const char *eq, *space, *quot, *value;
+ enum xml_error error;
+
+ eq = str_findchr(name, end, '=');
+ if (eq == end)
+ return XML_ERROR_BUFFERDRY;
+
+ space = str_rtrim(name, eq);
+ state_push_token(state, args, XML_CDATA, name, space);
+
+ quot = str_ltrim(eq + 1, end);
+ if (quot == end)
+ return XML_ERROR_BUFFERDRY;
+ else if (*quot != '\'' && *quot != '"')
+ return XML_ERROR_INVALID;
+
+ value = quot + 1;
+ quot = str_findchr(value, end, *quot);
+ if (quot == end)
+ return XML_ERROR_BUFFERDRY;
+
+ state_set_pos(state, args, value);
+ error = parse_attrvalue(state, args, quot);
+ if (error != XML_SUCCESS)
+ return error;
+
+ name = str_ltrim(quot + 1, end);
+ }
+
+ {
+ struct xml_token *token = args->tokens + (ntokens - 1);
+ token->size = (u16)(state->ntokens - ntokens);
+ }
+
+ return state_set_pos(state, args, name);
+}
+
+#define TAG_LEN(str) (sizeof(str) - 1)
+#define TAG_MINSIZE 1
+
+static enum xml_error parse_comment(struct xml *state, struct xml_args *args)
+{
+ static const char START_TAG[] = "<!--";
+ static const char END_TAG[] = "-->";
+
+ const char *dash;
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ if (end - start < (int)TAG_LEN(START_TAG))
+ return XML_ERROR_BUFFERDRY;
+
+ if (!str_starts_with(start, end, START_TAG))
+ return XML_ERROR_INVALID;
+
+ start += TAG_LEN(START_TAG);
+ dash = str_findstr(start, end, END_TAG);
+ if (dash == end)
+ return XML_ERROR_BUFFERDRY;
+
+ state_push_token(state, args, XML_COMMENT, start, dash);
+ return state_set_pos(state, args, dash + TAG_LEN(END_TAG));
+}
+
+static enum xml_error parse_instruction(struct xml *state, struct xml_args *args)
+{
+ static const char START_TAG[] = "<?";
+ static const char END_TAG[] = "?>";
+
+ enum xml_error error;
+ const char *quest, *space;
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ assert(TAG_MINSIZE <= end - start);
+
+ if (!str_starts_with(start, end, START_TAG))
+ return XML_ERROR_INVALID;
+
+ start += TAG_LEN(START_TAG);
+ space = str_find_notalnum(start, end);
+ if (space == end)
+ return XML_ERROR_BUFFERDRY;
+
+ state_push_token(state, args, XML_INSTRUCTION, start, space);
+
+ state_set_pos(state, args, space);
+ error = parse_attributes(state, args);
+ if (error != XML_SUCCESS)
+ return error;
+
+ quest = buffer_from_offset(args, state->buffer_pos);
+ if (end - quest < (int)TAG_LEN(END_TAG))
+ return XML_ERROR_BUFFERDRY;
+
+ if (!str_starts_with(quest, end, END_TAG))
+ return XML_ERROR_INVALID;
+
+ return state_set_pos(state, args, quest + TAG_LEN(END_TAG));
+}
+
+static enum xml_error parse_doctype(struct xml *state, struct xml_args *args)
+{
+ static const char START_TAG[] = "<!DOCTYPE";
+ static const char END_TAG[] = ">";
+
+ const char *bracket;
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ if (end - start < (int)TAG_LEN(START_TAG))
+ return XML_ERROR_BUFFERDRY;
+
+ if (!str_starts_with(start, end, START_TAG))
+ return XML_ERROR_BUFFERDRY;
+
+ start += TAG_LEN(START_TAG);
+ bracket = str_findstr(start, end, END_TAG);
+ if (bracket == end)
+ return XML_ERROR_BUFFERDRY;
+
+ state_push_token(state, args, XML_DOCTYPE, start, bracket);
+ return state_set_pos(state, args, bracket + TAG_LEN(END_TAG));
+}
+
+static enum xml_error parse_start(struct xml *state, struct xml_args *args)
+{
+ enum xml_error error;
+ const char *gt, *name, *space;
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ assert(TAG_MINSIZE <= end - start);
+
+ if (!(start[0] == '<' && is_alpha(start[1])))
+ return XML_ERROR_INVALID;
+
+ name = start + 1;
+ space = str_find_notalnum(name, end);
+ if (space == end)
+ return XML_ERROR_BUFFERDRY;
+
+ state_push_token(state, args, XML_START_TAG, name, space);
+
+ state_set_pos(state, args, space);
+ error = parse_attributes(state, args);
+ if (error != XML_SUCCESS)
+ return error;
+
+ gt = buffer_from_offset(args, state->buffer_pos);
+
+ if (gt != end && *gt == '/') {
+ state_push_token(state, args, XML_END_TAG, name, space);
+ gt++;
+ }
+
+ if (gt == end)
+ return XML_ERROR_BUFFERDRY;
+
+ if (*gt != '>')
+ return XML_ERROR_INVALID;
+
+ return state_set_pos(state, args, gt + 1);
+}
+
+static enum xml_error parse_end(struct xml *state, struct xml_args *args)
+{
+ const char *gt, *space;
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ assert(TAG_MINSIZE <= end - start);
+
+ if (!(str_starts_with(start, end, "</") && is_alpha(start[2])))
+ return XML_ERROR_INVALID;
+
+ start += 2;
+ gt = str_findchr(start, end, '>');
+ if (gt == end)
+ return XML_ERROR_BUFFERDRY;
+
+ space = str_find_notalnum(start, gt);
+ if (str_ltrim(space, gt) != gt)
+ return XML_ERROR_STRICT;
+
+ state_push_token(state, args, XML_END_TAG, start, space);
+ return state_set_pos(state, args, gt + 1);
+}
+
+static enum xml_error parse_cdata(struct xml *state, struct xml_args *args)
+{
+ static const char START_TAG[] = "<![CDATA[";
+ static const char END_TAG[] = "]]>";
+
+ const char *bracket;
+ const char *start = buffer_from_offset(args, state->buffer_pos);
+ const char *end = buffer_getend(args);
+ if (end - start < (int)TAG_LEN(START_TAG))
+ return XML_ERROR_BUFFERDRY;
+
+ if (!str_starts_with(start, end, START_TAG))
+ return XML_ERROR_INVALID;
+
+ start += TAG_LEN(START_TAG);
+ bracket = str_findstr(start, end, END_TAG);
+ if (bracket == end)
+ return XML_ERROR_BUFFERDRY;
+
+ state_push_token(state, args, XML_CDATA, start, bracket);
+ return state_set_pos(state, args, bracket + TAG_LEN(END_TAG));
+}
+
+void xml_init(struct xml *state)
+{
+ state->buffer_pos = 0;
+ state->ntokens = 0;
+ state->tag_level = 0;
+}
+
+#define ROOT_FOUND(state) (0 < (state)->tag_level)
+#define ROOT_PARSED(state) ((state)->tag_level == 0)
+
+enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_length,
+ struct xml_token tokens[], u32 num_tokens)
+{
+ struct xml temp = *state;
+ const char *end = buffer + buffer_length;
+
+ struct xml_args args;
+ args.buffer = buffer;
+ args.buffer_length = buffer_length;
+ args.tokens = tokens;
+ args.num_tokens = num_tokens;
+
+ while (!ROOT_FOUND(&temp)) {
+ enum xml_error error;
+ const char *start = buffer_from_offset(&args, temp.buffer_pos);
+ const char *lt = str_ltrim(start, end);
+ state_set_pos(&temp, &args, lt);
+ state_commit(state, &temp);
+
+ if (end - lt < TAG_MINSIZE)
+ return XML_ERROR_BUFFERDRY;
+
+ if (*lt != '<')
+ return XML_ERROR_INVALID;
+
+ switch (lt[1]) {
+ case '?':
+ error = parse_instruction(&temp, &args);
+ break;
+ case '!':
+ error = (lt[2] == '-') ? parse_comment(&temp, &args) :
+ parse_doctype(&temp, &args);
+ break;
+ default:
+ error = parse_start(&temp, &args);
+ break;
+ }
+
+ if (error != XML_SUCCESS)
+ return error;
+
+ state_commit(state, &temp);
+ }
+
+ while (!ROOT_PARSED(&temp)) {
+ enum xml_error error;
+ const char *start = buffer_from_offset(&args, temp.buffer_pos);
+ const char *lt = str_findchr(start, end, '<');
+ while (buffer_from_offset(&args, temp.buffer_pos) != lt) {
+ error = parse_characters(&temp, &args, lt);
+ if (error != XML_SUCCESS)
+ return error;
+
+ state_commit(state, &temp);
+ }
+
+ if (end - lt < TAG_MINSIZE)
+ return XML_ERROR_BUFFERDRY;
+
+ switch (lt[1]) {
+ case '?':
+ error = parse_instruction(&temp, &args);
+ break;
+ case '/':
+ error = parse_end(&temp, &args);
+ break;
+ case '!':
+ error = (lt[2] == '-') ? parse_comment(&temp, &args) :
+ parse_cdata(&temp, &args);
+ break;
+ default:
+ error = parse_start(&temp, &args);
+ break;
+ }
+
+ if (error != XML_SUCCESS)
+ return error;
+
+ state_commit(state, &temp);
+ }
+
+ return XML_SUCCESS;
+}
diff --git a/libs/libtxt/xml.h b/libs/libtxt/xml.h
new file mode 100644
index 0000000..43a8005
--- /dev/null
+++ b/libs/libtxt/xml.h
@@ -0,0 +1,51 @@
+// Inspired by sxml (capmar)
+// MIT License, Copyright (c) 2020 Marvin Borner
+
+#ifndef XML_H
+#define XML_H
+
+#include <def.h>
+
+enum xml_error {
+ XML_ERROR_INVALID = -1,
+ XML_SUCCESS = 0,
+ XML_ERROR_BUFFERDRY = 1,
+ XML_ERROR_TOKENSFULL = 2
+};
+
+struct xml_token {
+ u16 type;
+ u16 size;
+ u32 start_pos;
+ u32 end_pos;
+};
+
+struct xml_args {
+ const char *buffer;
+ u32 buffer_length;
+ struct xml_token *tokens;
+ u32 num_tokens;
+};
+
+enum xml_type {
+ XML_START_TAG,
+ XML_END_TAG,
+ XML_CHARACTER,
+ XML_CDATA,
+ XML_INSTRUCTION,
+ XML_DOCTYPE,
+ XML_COMMENT
+};
+
+struct xml {
+ u32 buffer_pos;
+ u32 ntokens;
+ u32 tag_level;
+};
+
+enum xml_error xml_parse(struct xml *parser, const char *buffer, u32 buffer_length,
+ struct xml_token *tokens, u32 num_tokens);
+
+void xml_init(struct xml *parser);
+
+#endif