aboutsummaryrefslogtreecommitdiff
path: root/libtxt
diff options
context:
space:
mode:
authorMarvin Borner2020-11-23 22:19:54 +0100
committerMarvin Borner2020-11-23 22:19:54 +0100
commit8babf8b26e23ffdd8094c810295061effde153dd (patch)
tree98aebc32ae3f698b1381d507e595c788e34ada2a /libtxt
parentf32a888e6fdcb13802f289ba1aa58b70fa9466f3 (diff)
Fixed some XML/HTML parsing bugs
Diffstat (limited to 'libtxt')
-rw-r--r--libtxt/Makefile2
-rw-r--r--libtxt/html.c18
-rw-r--r--libtxt/inc/html.h9
-rw-r--r--libtxt/xml.c8
4 files changed, 33 insertions, 4 deletions
diff --git a/libtxt/Makefile b/libtxt/Makefile
index 543a90d..3536250 100644
--- a/libtxt/Makefile
+++ b/libtxt/Makefile
@@ -1,6 +1,6 @@
# MIT License, Copyright (c) 2020 Marvin Borner
-COBJS = keymap.o xml.o
+COBJS = keymap.o xml.o html.o
CC = ccache ../cross/opt/bin/i686-elf-gcc
LD = ccache ../cross/opt/bin/i686-elf-ld
AR = ccache ../cross/opt/bin/i686-elf-ar
diff --git a/libtxt/html.c b/libtxt/html.c
new file mode 100644
index 0000000..0c07323
--- /dev/null
+++ b/libtxt/html.c
@@ -0,0 +1,18 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+// HTML parsing is mainly based on the XML parser
+
+#include <print.h>
+#include <str.h>
+
+int html_self_closing(const char *tag)
+{
+ // TODO: Add 'meta'?
+ const char *void_elements[] = { "area", "base", "br", "col", "embed", "hr", "img",
+ "input", "link", "param", "source", "track", "wbr" };
+
+ for (u32 i = 0; i < sizeof(void_elements) / sizeof(void_elements[0]); ++i) {
+ if (!strcmp(void_elements[i], tag))
+ return 1;
+ }
+ return 0;
+}
diff --git a/libtxt/inc/html.h b/libtxt/inc/html.h
new file mode 100644
index 0000000..dd2b59f
--- /dev/null
+++ b/libtxt/inc/html.h
@@ -0,0 +1,9 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+// HTML parsing is mainly based on the XML parser
+
+#ifndef HTML_H
+#define HTML_H
+
+int html_self_closing(const char *tag);
+
+#endif
diff --git a/libtxt/xml.c b/libtxt/xml.c
index 90fd553..b92181b 100644
--- a/libtxt/xml.c
+++ b/libtxt/xml.c
@@ -250,7 +250,7 @@ static enum xml_error parse_attributes(struct xml *state, struct xml_args *args)
}
#define TAG_LEN(str) (sizeof(str) - 1)
-#define TAG_MINSIZE 3
+#define TAG_MINSIZE 1
static enum xml_error parse_comment(struct xml *state, struct xml_args *args)
{
@@ -319,7 +319,6 @@ static enum xml_error parse_doctype(struct xml *state, struct xml_args *args)
const char *bracket;
const char *start = buffer_from_offset(args, state->buffer_pos);
const char *end = buffer_getend(args);
- print("GOT HERE\n");
if (end - start < (int)TAG_LEN(START_TAG))
return XML_ERROR_BUFFERDRY;
@@ -330,7 +329,6 @@ static enum xml_error parse_doctype(struct xml *state, struct xml_args *args)
bracket = str_findstr(start, end, END_TAG);
if (bracket == end)
return XML_ERROR_BUFFERDRY;
- print("GOT HERE!!!!\n");
state_push_token(state, args, XML_DOCTYPE, start, bracket);
return state_set_pos(state, args, bracket + TAG_LEN(END_TAG));
@@ -487,6 +485,10 @@ enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_lengt
state_commit(state, &temp);
}
+ // TODO: Only for self-closing tags
+ if (end - lt == 0)
+ break;
+
if (end - lt < TAG_MINSIZE)
return XML_ERROR_BUFFERDRY;