aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2020-11-23 22:19:54 +0100
committerMarvin Borner2020-11-23 22:19:54 +0100
commit8babf8b26e23ffdd8094c810295061effde153dd (patch)
tree98aebc32ae3f698b1381d507e595c788e34ada2a
parentf32a888e6fdcb13802f289ba1aa58b70fa9466f3 (diff)
Fixed some XML/HTML parsing bugs
-rw-r--r--apps/browser.c22
-rw-r--r--libgui/gfx.c3
-rw-r--r--libtxt/Makefile2
-rw-r--r--libtxt/html.c18
-rw-r--r--libtxt/inc/html.h9
-rw-r--r--libtxt/xml.c8
6 files changed, 47 insertions, 15 deletions
diff --git a/apps/browser.c b/apps/browser.c
index 0b53797..7f4fc6e 100644
--- a/apps/browser.c
+++ b/apps/browser.c
@@ -4,6 +4,7 @@
#include <def.h>
#include <gfx.h>
#include <gui.h>
+#include <html.h>
#include <input.h>
#include <mem.h>
#include <net.h>
@@ -63,7 +64,7 @@ void parse(void *data, u32 len, char *out)
enum xml_error err = xml_parse(&parser, buffer, len, tokens, 128);
if (err != XML_SUCCESS) {
- printf("XML parse error: %d\n", err);
+ printf("\nXML parse error: %d\n", err);
return;
}
@@ -74,10 +75,13 @@ void parse(void *data, u32 len, char *out)
name[0] = '\0';
switch (token->type) {
case XML_START_TAG:
- print_indent(out, indent++);
memcpy(&name, (u8 *)buffer + token->start_pos,
token->end_pos - token->start_pos);
name[token->end_pos - token->start_pos] = '\0';
+ if (html_self_closing(name))
+ print_indent(out, indent);
+ else
+ print_indent(out, indent++);
strcat(out, name);
strcat(out, "\n");
break;
@@ -108,12 +112,12 @@ void parse(void *data, u32 len, char *out)
break;
}
}
- if (strlen(clean_name)) {
- print_indent(out, indent++);
- strcat(out, clean_name);
- strcat(out, "\n");
- indent--;
- }
+ if (!strlen(clean_name))
+ break;
+ print_indent(out, indent++);
+ strcat(out, clean_name);
+ strcat(out, "\n");
+ indent--;
break;
default:
break;
@@ -144,7 +148,7 @@ void on_submit(void *event, struct element *box)
struct element_label *c = code_label->data;
struct socket *socket = net_open(S_TCP);
- if (socket && net_connect(socket, ip, 8000)) {
+ if (socket && net_connect(socket, ip, 80)) {
net_send(socket, query, strlen(query));
char buf[4096] = { 0 };
char parsed[4096] = { 0 };
diff --git a/libgui/gfx.c b/libgui/gfx.c
index 6d54550..0efedf9 100644
--- a/libgui/gfx.c
+++ b/libgui/gfx.c
@@ -121,10 +121,9 @@ void gfx_write(struct context *ctx, int x, int y, enum font_type font_type, u32
cnt = 0;
} else if (text[i] == '\n') {
cnt = 0;
- x = 0;
y += font->height;
} else if (text[i] == '\t') {
- x += 4 * font->width;
+ cnt += 4;
} else {
// TODO: Overflow on single line input
if ((cnt + 1) * font->width > ctx->width) {
diff --git a/libtxt/Makefile b/libtxt/Makefile
index 543a90d..3536250 100644
--- a/libtxt/Makefile
+++ b/libtxt/Makefile
@@ -1,6 +1,6 @@
# MIT License, Copyright (c) 2020 Marvin Borner
-COBJS = keymap.o xml.o
+COBJS = keymap.o xml.o html.o
CC = ccache ../cross/opt/bin/i686-elf-gcc
LD = ccache ../cross/opt/bin/i686-elf-ld
AR = ccache ../cross/opt/bin/i686-elf-ar
diff --git a/libtxt/html.c b/libtxt/html.c
new file mode 100644
index 0000000..0c07323
--- /dev/null
+++ b/libtxt/html.c
@@ -0,0 +1,18 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+// HTML parsing is mainly based on the XML parser
+
+#include <print.h>
+#include <str.h>
+
+int html_self_closing(const char *tag)
+{
+ // TODO: Add 'meta'?
+ const char *void_elements[] = { "area", "base", "br", "col", "embed", "hr", "img",
+ "input", "link", "param", "source", "track", "wbr" };
+
+ for (u32 i = 0; i < sizeof(void_elements) / sizeof(void_elements[0]); ++i) {
+ if (!strcmp(void_elements[i], tag))
+ return 1;
+ }
+ return 0;
+}
diff --git a/libtxt/inc/html.h b/libtxt/inc/html.h
new file mode 100644
index 0000000..dd2b59f
--- /dev/null
+++ b/libtxt/inc/html.h
@@ -0,0 +1,9 @@
+// MIT License, Copyright (c) 2020 Marvin Borner
+// HTML parsing is mainly based on the XML parser
+
+#ifndef HTML_H
+#define HTML_H
+
+int html_self_closing(const char *tag);
+
+#endif
diff --git a/libtxt/xml.c b/libtxt/xml.c
index 90fd553..b92181b 100644
--- a/libtxt/xml.c
+++ b/libtxt/xml.c
@@ -250,7 +250,7 @@ static enum xml_error parse_attributes(struct xml *state, struct xml_args *args)
}
#define TAG_LEN(str) (sizeof(str) - 1)
-#define TAG_MINSIZE 3
+#define TAG_MINSIZE 1
static enum xml_error parse_comment(struct xml *state, struct xml_args *args)
{
@@ -319,7 +319,6 @@ static enum xml_error parse_doctype(struct xml *state, struct xml_args *args)
const char *bracket;
const char *start = buffer_from_offset(args, state->buffer_pos);
const char *end = buffer_getend(args);
- print("GOT HERE\n");
if (end - start < (int)TAG_LEN(START_TAG))
return XML_ERROR_BUFFERDRY;
@@ -330,7 +329,6 @@ static enum xml_error parse_doctype(struct xml *state, struct xml_args *args)
bracket = str_findstr(start, end, END_TAG);
if (bracket == end)
return XML_ERROR_BUFFERDRY;
- print("GOT HERE!!!!\n");
state_push_token(state, args, XML_DOCTYPE, start, bracket);
return state_set_pos(state, args, bracket + TAG_LEN(END_TAG));
@@ -487,6 +485,10 @@ enum xml_error xml_parse(struct xml *state, const char *buffer, u32 buffer_lengt
state_commit(state, &temp);
}
+ // TODO: Only for self-closing tags
+ if (end - lt == 0)
+ break;
+
if (end - lt < TAG_MINSIZE)
return XML_ERROR_BUFFERDRY;