diff options
author | Marvin Borner | 2020-11-23 22:19:54 +0100 |
---|---|---|
committer | Marvin Borner | 2020-11-23 22:19:54 +0100 |
commit | 8babf8b26e23ffdd8094c810295061effde153dd (patch) | |
tree | 98aebc32ae3f698b1381d507e595c788e34ada2a /libtxt | |
parent | f32a888e6fdcb13802f289ba1aa58b70fa9466f3 (diff) |
Fixed some XML/HTML parsing bugs
Diffstat (limited to 'libtxt')
-rw-r--r-- | libtxt/Makefile | 2 | ||||
-rw-r--r-- | libtxt/html.c | 18 | ||||
-rw-r--r-- | libtxt/inc/html.h | 9 | ||||
-rw-r--r-- | libtxt/xml.c | 8 |
4 files changed, 33 insertions, 4 deletions
diff --git a/libtxt/Makefile b/libtxt/Makefile index 543a90d..3536250 100644 --- a/libtxt/Makefile +++ b/libtxt/Makefile @@ -1,6 +1,6 @@ # MIT License, Copyright (c) 2020 Marvin Borner -COBJS = keymap.o xml.o +COBJS = keymap.o xml.o html.o CC = ccache ../cross/opt/bin/i686-elf-gcc LD = ccache ../cross/opt/bin/i686-elf-ld AR = ccache ../cross/opt/bin/i686-elf-ar diff --git a/libtxt/html.c b/libtxt/html.c new file mode 100644 index 0000000..0c07323 --- /dev/null +++ b/libtxt/html.c @@ -0,0 +1,18 @@ +// MIT License, Copyright (c) 2020 Marvin Borner +// HTML parsing is mainly based on the XML parser + +#include <print.h> +#include <str.h> + +int html_self_closing(const char *tag) +{ + // TODO: Add 'meta'? + const char *void_elements[] = { "area", "base", "br", "col", "embed", "hr", "img", + "input", "link", "param", "source", "track", "wbr" }; + + for (u32 i = 0; i < sizeof(void_elements) / sizeof(void_elements[0]); ++i) { + if (!strcmp(void_elements[i], tag)) + return 1; + } + return 0; +} diff --git a/libtxt/inc/html.h b/libtxt/inc/html.h new file mode 100644 index 0000000..dd2b59f --- /dev/null +++ b/libtxt/inc/html.h @@ -0,0 +1,9 @@ +// MIT License, Copyright (c) 2020 Marvin Borner +// HTML parsing is mainly based on the XML parser + +#ifndef HTML_H +#define HTML_H + +int html_self_closing(const char *tag); + +#endif diff --git a/libtxt/xml.c b/libtxt/xml.c index 90fd553..b92181b 100644 --- a/libtxt/xml.c +++ b/libtxt/xml.c @@ -250,7 +250,7 @@ static enum xml_error parse_attributes(struct xml *state, struct xml_args *args) } #define TAG_LEN(str) (sizeof(str) - 1) -#define TAG_MINSIZE 3 +#define TAG_MINSIZE 1 static enum xml_error parse_comment(struct xml *state, struct xml_args *args) { @@ -319,7 +319,6 @@ static enum xml_error parse_doctype(struct xml *state, struct xml_args *args) const char *bracket; const char *start = buffer_from_offset(args, state->buffer_pos); const char *end = buffer_getend(args); - print("GOT HERE\n"); if (end - start < (int)TAG_LEN(START_TAG)) return XML_ERROR_BUFFERDRY; @@ -330,7 +329,6 @@ static enum xml_error parse_doctype(struct xml *state, struct xml_args *args) bracket = str_findstr(start, end, END_TAG); if (bracket == end) return XML_ERROR_BUFFERDRY; - print("GOT HERE!!!!\n"); state_push_token(state, args, XML_DOCTYPE, start, bracket); return state_set_pos(state, args, bracket + TAG_LEN(END_TAG)); @@ -487,6 +485,10 @@ enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_lengt state_commit(state, &temp); } + // TODO: Only for self-closing tags + if (end - lt == 0) + break; + if (end - lt < TAG_MINSIZE) return XML_ERROR_BUFFERDRY; |