diff options
author | Marvin Borner | 2023-05-20 14:13:45 +0200 |
---|---|---|
committer | Marvin Borner | 2023-05-20 14:13:45 +0200 |
commit | 8499010b91a2c7496d6af74cce35a6b4e0378633 (patch) | |
tree | c57a6a6fb12f4653928662e93d4470453068e361 | |
parent | f640ceee89836b56ac95c4eb1b0a43d1171c3354 (diff) |
Added testing flag
still not able to find the 8cc bug but idc because I tested it
with many programs and it probably won't be an issue for now
-rw-r--r-- | inc/build.h | 6 | ||||
-rw-r--r-- | inc/parse.h | 1 | ||||
-rw-r--r-- | inc/term.h | 1 | ||||
-rw-r--r-- | options.ggo | 1 | ||||
-rw-r--r-- | readme.md | 18 | ||||
-rw-r--r-- | src/build.c | 19 | ||||
-rw-r--r-- | src/main.c | 130 | ||||
-rw-r--r-- | src/parse.c | 32 | ||||
-rw-r--r-- | src/term.c | 29 |
9 files changed, 154 insertions, 83 deletions
diff --git a/inc/build.h b/inc/build.h index 2d99f9d..0b76768 100644 --- a/inc/build.h +++ b/inc/build.h @@ -4,11 +4,13 @@ #ifndef BLOC_BUILD_H #define BLOC_BUILD_H +#include <stdio.h> + #include <spec.h> #include <tree.h> #include <parse.h> -void write_bloc(struct list *table, const char *path); -void write_blc(struct bloc_parsed *bloc, const char *path); +void write_bloc(struct list *table, FILE *file); +void write_blc(struct bloc_parsed *bloc, FILE *file); #endif diff --git a/inc/parse.h b/inc/parse.h index 205f204..8eba970 100644 --- a/inc/parse.h +++ b/inc/parse.h @@ -17,6 +17,5 @@ struct bloc_parsed { struct term *parse_blc(const char *term); struct bloc_parsed *parse_bloc(const void *bloc); void free_bloc(struct bloc_parsed *bloc); -struct term *from_bloc(struct bloc_parsed *bloc); #endif @@ -29,5 +29,6 @@ struct term { struct term *new_term(term_type type); void free_term(struct term *term); +void diff_term(struct term *a, struct term *b); #endif diff --git a/options.ggo b/options.ggo index 11626b3..7cf6b77 100644 --- a/options.ggo +++ b/options.ggo @@ -8,3 +8,4 @@ option "verbose" v "enable debug logging output" flag off option "from-blc" b "convert from BLC to BLoC" flag off option "from-bloc" B "convert from BLoC to BLC" flag off option "dump" d "dump bloc file" dependon="from-bloc" flag off +option "test" t "compare BLC with generated BLoC" dependon="from-blc" flag off @@ -71,7 +71,7 @@ A possible encoding in BLoC: | 0x04 | 0x06 | number of entries: 3 | | 0x06 | 0x17 | encoded `M`: gets a bit longer due to different encoding | | 0x17 | 0x28 | encoded `N`: gets a bit longer due to different encoding | -| 0x28 | 0x34 | `00010010010011<M>00011<N>011<M>011<N>`, where `<M>=00{1}` and `<N>=00{1}` are indices with length of 1.25 byte | +| 0x28 | 0x34 | `00010010010011<M>00011<N>011<M>011<N>`, where `<M>=00{1}` and `<N>=00{0}` are indices with length of 1.25 byte | Even in this small example BLoC uses less space than BLC (0x34 vs. 0x42 bytes). Depending on the content of `M` and `N`, this could have @@ -95,12 +95,18 @@ shortest one (as fully reduced expressions aren’t necessarily shorter). ## Improvements -The current optimizer does not always make the best deduplication +There seem to be problems with *very* big files: +[8cc](https://github.com/woodrush/lambda-8cc) does not pass the bloc-blc +comparison test. I’ve not been able to reproduce this bug with any other +file and 8cc itself is too huge to comfortably debug the issue. If +you’re reading this: Please help me :( + +Also the current optimizer does not always make the best deduplication choices. It seems like finding the optimal deduplications requires quite complex algorithms which would probably be rather inefficient. For example, as of right now the length of an expression as seen by the -deduplicator doesn’t consider the change of length when sub-expressions -get replaced by a reference (of unknown bit length!) to another -expression. This results in entries like `(0 <1234>)` that would not -have needed to be deduplicated. +deduplicator doesn’t consider the change of occurrence count when +sub-expressions get replaced by a reference to another expression. This +results in entries like `(0 <1234>)` that would not have needed to be +deduplicated. diff --git a/src/build.c b/src/build.c index fff5651..9fdcad4 100644 --- a/src/build.c +++ b/src/build.c @@ -67,12 +67,9 @@ static void write_bblc(struct tree *tree, FILE *file) fwrite(&byte, 1, 1, file); } -void write_bloc(struct list *table, const char *path) +static void write_bloc_file(struct list *table, FILE *file) { short length = table->val; - debug("writing bloc with %ld elements to %s\n", length, path); - - FILE *file = fopen(path, "wb"); fwrite(BLOC_IDENTIFIER, BLOC_IDENTIFIER_LENGTH, 1, file); fwrite(&length, 2, 1, file); @@ -81,8 +78,14 @@ void write_bloc(struct list *table, const char *path) write_bblc(iterator->data, file); iterator = iterator->next; } +} - fclose(file); +void write_bloc(struct list *table, FILE *file) +{ + short length = table->val; + debug("writing bloc with %ld elements\n", length); + + write_bloc_file(table, file); } static void fprint_bloc_blc(struct term *term, struct bloc_parsed *bloc, @@ -115,12 +118,8 @@ static void fprint_bloc_blc(struct term *term, struct bloc_parsed *bloc, } } -void write_blc(struct bloc_parsed *bloc, const char *path) +void write_blc(struct bloc_parsed *bloc, FILE *file) { - FILE *file = fopen(path, "wb"); - fprint_bloc_blc(bloc->entries[bloc->length - 1], bloc, file); fprintf(file, "\n"); - - fclose(file); } @@ -44,13 +44,8 @@ static char *read_stdin(void) return string; } -static char *read_file(const char *path) +static char *read_file(FILE *f) { - debug("reading from %s\n", path); - FILE *f = fopen(path, "rb"); - if (!f) - fatal("can't open file %s: %s\n", path, strerror(errno)); - fseek(f, 0, SEEK_END); long fsize = ftell(f); fseek(f, 0, SEEK_SET); @@ -59,17 +54,104 @@ static char *read_file(const char *path) if (!string) fatal("out of memory!\n"); int ret = fread(string, fsize, 1, f); - fclose(f); if (ret != 1) { free(string); - fatal("can't read file %s: %s\n", path, strerror(errno)); + fatal("can't read file: %s\n", strerror(errno)); } string[fsize] = 0; return string; } +static char *read_path(const char *path) +{ + debug("reading from %s\n", path); + FILE *f = fopen(path, "rb"); + if (!f) + fatal("can't open file %s: %s\n", path, strerror(errno)); + char *string = read_file(f); + fclose(f); + return string; +} + +static void test(char *input) +{ + debug("parsing as blc\n"); + + struct term *parsed_1 = parse_blc(input); + free(input); + debug("parsed original blc\n"); + + debug("merging duplicates\n"); + struct list *table = tree_merge_duplicates(parsed_1); + + FILE *temp_bloc = tmpfile(); + write_bloc(table, temp_bloc); + tree_destroy(table); + + debug("parsing as bloc\n"); + char *temp = read_file(temp_bloc); + struct bloc_parsed *bloc = parse_bloc(temp); + fseek(temp_bloc, 0, SEEK_END); + fprintf(stderr, "size bloc: %lu\n", ftell(temp_bloc)); + fclose(temp_bloc); + + FILE *temp_blc = tmpfile(); + write_blc(bloc, temp_blc); + char *input_2 = read_file(temp_blc); + struct term *parsed_2 = parse_blc(input_2); + fseek(temp_blc, 0, SEEK_END); + fprintf(stderr, "size blc: ~%lu\n", ftell(temp_blc) / 8); + fclose(temp_blc); + free(input_2); + debug("parsed reconstructed blc\n"); + + diff_term(parsed_1, parsed_2); + debug("diffed two terms\n"); + + free_term(parsed_1); + free_term(parsed_2); + debug("done!\n"); +} + +static void from_blc(char *input, char *output_path) +{ + debug("parsing as blc\n"); + + struct term *parsed = parse_blc(input); + debug("parsed blc\n"); + + debug("merging duplicates\n"); + struct list *table = tree_merge_duplicates(parsed); + + FILE *file = fopen(output_path, "wb"); + write_bloc(table, file); + fclose(file); + + tree_destroy(table); + free_term(parsed); + free(input); + + debug("done!\n"); +} + +static void from_bloc(char *input, char *output_path, int dump) +{ + debug("parsing as bloc\n"); + + struct bloc_parsed *bloc = parse_bloc(input); + if (dump) + print_bloc(bloc); + + FILE *file = fopen(output_path, "wb"); + write_blc(bloc, file); + fclose(file); + + free(input); + free_bloc(bloc); +} + int main(int argc, char **argv) { struct gengetopt_args_info args; @@ -82,40 +164,24 @@ int main(int argc, char **argv) if (args.input_arg[0] == '-') { input = read_stdin(); } else { - input = read_file(args.input_arg); + input = read_path(args.input_arg); } if (!input) return 1; - if (args.from_blc_flag && !args.from_bloc_flag) { - debug("parsing as blc\n"); - - struct term *parsed = parse_blc(input); - debug("parsed blc\n"); - - debug("merging duplicates\n"); - struct list *table = tree_merge_duplicates(parsed); - - write_bloc(table, args.output_arg); - - tree_destroy(table); - free_term(parsed); - free(input); + if (args.test_flag && args.from_blc_flag && !args.from_bloc_flag) { + test(input); + return 0; + } - debug("done!\n"); + if (args.from_blc_flag && !args.from_bloc_flag) { + from_blc(input, args.output_arg); return 0; } if (args.from_bloc_flag && !args.from_blc_flag) { - debug("parsing as bloc\n"); - - struct bloc_parsed *bloc = parse_bloc(input); - if (args.dump_flag) - print_bloc(bloc); - write_blc(bloc, args.output_arg); - free(input); - free_bloc(bloc); + from_bloc(input, args.output_arg, args.dump_flag); return 0; } diff --git a/src/parse.c b/src/parse.c index 9a8af45..f6e047d 100644 --- a/src/parse.c +++ b/src/parse.c @@ -120,35 +120,3 @@ void free_bloc(struct bloc_parsed *bloc) free(bloc->entries); free(bloc); } - -static struct term *rec_bloc(struct term *term, struct bloc_parsed *bloc) -{ - switch (term->type) { - case ABS: - rec_bloc(term->u.abs.term, bloc); - break; - case APP: - rec_bloc(term->u.app.lhs, bloc); - rec_bloc(term->u.app.rhs, bloc); - break; - case VAR: - break; - case REF: - if (term->u.ref.index >= bloc->length) - fatal("invalid entry reference\n"); - memcpy(term, - bloc->entries[bloc->length - term->u.ref.index - 1], - sizeof(*term)); - break; - default: - fatal("invalid type %d\n", term->type); - return 0; - } - return term; -} - -struct term *from_bloc(struct bloc_parsed *bloc) -{ - struct term *last = bloc->entries[bloc->length - 1]; - return rec_bloc(last, bloc); -} @@ -6,6 +6,7 @@ #include <string.h> #include <term.h> +#include <print.h> #include <log.h> struct term *new_term(term_type type) @@ -39,3 +40,31 @@ void free_term(struct term *term) fatal("invalid type %d\n", term->type); } } + +void diff_term(struct term *a, struct term *b) +{ + if (a->type != b->type) { + fprintf(stderr, "Term a: "); + print_bruijn(a); + fprintf(stderr, "\nTerm b: "); + print_bruijn(b); + fatal("\ntype mismatch %d %d\n", a->type, b->type); + } + + switch (a->type) { + case ABS: + diff_term(a->u.abs.term, b->u.abs.term); + break; + case APP: + diff_term(a->u.app.lhs, b->u.app.lhs); + diff_term(a->u.app.rhs, b->u.app.rhs); + break; + case VAR: + if (a->u.var.index != b->u.var.index) + fatal("var mismatch %d=%d\n", a->u.var.index, + b->u.var.index); + break; + default: + fatal("invalid type %d\n", a->type); + } +} |