aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2024-01-17 19:35:53 +0100
committerMarvin Borner2024-01-17 19:35:53 +0100
commit7bfd0540670cdf20d827dace7d720247ae3025d4 (patch)
tree26438a643b8d3418077306d103362d7de16b6dba
parent73deba308cd3874e574f6be27e03c4b100783ecd (diff)
Almost working shared blc
-rw-r--r--inc/term.h1
-rw-r--r--readme.md7
-rw-r--r--src/targets/blc.c149
-rw-r--r--src/term.c1
4 files changed, 121 insertions, 37 deletions
diff --git a/inc/term.h b/inc/term.h
index 9823fc1..4cbc0b2 100644
--- a/inc/term.h
+++ b/inc/term.h
@@ -25,6 +25,7 @@ struct term {
size_t index;
} ref;
} u;
+ unsigned int meta; // arbitrary field for targets
};
struct term *new_term(term_type type);
diff --git a/readme.md b/readme.md
index fe9f111..514efde 100644
--- a/readme.md
+++ b/readme.md
@@ -12,13 +12,12 @@ benchmarking, or general term optimization.
- BLC (sharing by abstraction): Terms having BLoC entry indices get
abstracted and applied to the respective term. The indices get
converted to De Bruijn indices. The dependency graph is resolved by
- a topological sort so De Bruijn indices won't get unnecessarily
- large. Flag `bblc` (bits) and `blc` (ASCII 0/1).
+ a topological sort. Flag `bblc` (bits) and `blc` (ASCII 0/1).
- BLC (unshared): Every BLoC entry gets reinserted into the original
term. Do not use this if you want efficiency or small files. Flag
`unbblc` (bits) and `unblc` (ASCII 0/1).
-- [Effekt](https://effekt-lang.org): Because, why not? Flag `effekt`.
-- Planned: Scala, HVM, C, LLVM, JS, Haskell
+- Planned: [Effekt](https://effekt-lang.org), Scala, HVM, C, LLVM, JS,
+ Haskell
## Benchmarks
diff --git a/src/targets/blc.c b/src/targets/blc.c
index 73096db..a1cbbf3 100644
--- a/src/targets/blc.c
+++ b/src/targets/blc.c
@@ -1,6 +1,10 @@
// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
// SPDX-License-Identifier: MIT
+// TODO: Instead of unsharing open entries, they could be abstracted into closed terms
+// and then resubstituted using the Jedi spaceship invasion technique.
+// This would only help in very rare edge cases though
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -9,17 +13,25 @@
#include <parse.h>
#include <log.h>
-static void fprint_blc(struct term *term, struct bloc_parsed *bloc, FILE *file)
+#define META_CLOSED 0x1
+#define META_OPEN 0x2
+
+static void fprint_blc_substituted(struct term *term, struct bloc_parsed *bloc,
+ size_t *positions_inv, void **closed,
+ int depth, FILE *file)
{
switch (term->type) {
case ABS:
fprintf(file, "00");
- fprint_blc(term->u.abs.term, bloc, file);
+ fprint_blc_substituted(term->u.abs.term, bloc, positions_inv,
+ closed, depth + 1, file);
break;
case APP:
fprintf(file, "01");
- fprint_blc(term->u.app.lhs, bloc, file);
- fprint_blc(term->u.app.rhs, bloc, file);
+ fprint_blc_substituted(term->u.app.lhs, bloc, positions_inv,
+ closed, depth, file);
+ fprint_blc_substituted(term->u.app.rhs, bloc, positions_inv,
+ closed, depth, file);
break;
case VAR:
for (int i = 0; i <= term->u.var.index; i++)
@@ -29,14 +41,56 @@ static void fprint_blc(struct term *term, struct bloc_parsed *bloc, FILE *file)
case REF:
if (term->u.ref.index + 1 >= bloc->length)
fatal("invalid ref index %ld\n", term->u.ref.index);
- fprint_blc(bloc->entries[bloc->length - term->u.ref.index - 2],
- bloc, file);
+
+ int reffed = bloc->length - term->u.ref.index - 2;
+ if (closed[reffed]) {
+ debug("closed ref %d\n", reffed);
+ int index = depth + positions_inv[reffed];
+ for (int i = 0; i <= index; i++)
+ fprintf(file, "1");
+ fprintf(file, "0");
+ } else {
+ fprint_blc_substituted(bloc->entries[reffed], bloc,
+ positions_inv, closed, depth,
+ file);
+ }
break;
default:
fatal("invalid type %d\n", term->type);
}
}
+static void fprint_blc(size_t *positions, void *closed,
+ struct bloc_parsed *bloc, FILE *file)
+{
+ size_t *positions_inv =
+ calloc(bloc->length * sizeof(*positions_inv), 1);
+
+ size_t end = 0;
+ for (size_t i = 0; i < bloc->length; i++) {
+ if (!positions[i]) {
+ end = i;
+ break;
+ }
+ positions_inv[bloc->length - positions[i]] = i;
+ fprintf(file, "0100"); // ([
+ /* fprintf(file, "(["); */
+ }
+
+ /* fprintf(file, "0"); */
+
+ for (size_t i = end; i > 0; i--) {
+ /* fprintf(file, "]"); // implicit */
+ fprint_blc_substituted(
+ bloc->entries[bloc->length - positions[i - 1]], bloc,
+ positions_inv, closed, 0, file);
+ /* fprintf(file, "<%ld>", bloc->length - positions[i - 1]); */
+ /* fprintf(file, ")"); // implicit */
+ }
+
+ free(positions_inv);
+}
+
static void bitmap_deps(struct bloc_parsed *bloc, char *bitmap,
struct term *term)
{
@@ -54,29 +108,50 @@ static void bitmap_deps(struct bloc_parsed *bloc, char *bitmap,
if (term->u.ref.index + 1 >= bloc->length)
fatal("invalid ref index %ld\n", term->u.ref.index);
bitmap[bloc->length - term->u.ref.index - 2] = 1;
- // for recursive bitmapping, not needed for DFS
- /* bitmap_deps( */
- /* bloc, bitmap, */
- /* bloc->entries[bloc->length - term->u.ref.index - 2]); */
break;
default:
fatal("invalid type %d\n", term->type);
}
}
-static void print_deps(size_t length, char *bitmap)
+// TODO: memoize META_CLOSED with term->meta (more complex than you'd think!)
+static unsigned int annotate(struct bloc_parsed *bloc, struct term *term,
+ int depth)
{
- for (size_t i = 0; i < length; i++) {
- if (bitmap[i])
- printf("%ld,", i);
+ switch (term->type) {
+ case ABS:
+ return annotate(bloc, term->u.abs.term, depth + 1);
+ case APP:;
+ unsigned int left = annotate(bloc, term->u.app.lhs, depth);
+ unsigned int right = annotate(bloc, term->u.app.rhs, depth);
+ if ((left & META_OPEN) || (right & META_OPEN))
+ return META_OPEN;
+ return META_CLOSED;
+ case VAR:
+ if (term->u.var.index >= depth)
+ return META_OPEN;
+ return META_CLOSED;
+ case REF:
+ if (term->u.ref.index + 1 >= bloc->length)
+ fatal("invalid ref index %ld\n", term->u.ref.index);
+ struct term *sub =
+ bloc->entries[bloc->length - term->u.ref.index - 2];
+ return annotate(bloc, sub, depth);
+ default:
+ fatal("invalid type %d\n", term->type);
}
- printf("\n");
}
#define PERMANENT_MARK 0x1
#define TEMPORARY_MARK 0x2
-static void visit(char **bitmaps, char *marks, size_t length, size_t n)
+static void visit(char **bitmaps, char *marks, size_t *positions,
+ size_t *position, size_t length, size_t n)
{
+ if (!bitmaps[n]) {
+ marks[n] |= PERMANENT_MARK;
+ return;
+ }
+
if (marks[n] & PERMANENT_MARK)
return;
if (marks[n] & TEMPORARY_MARK)
@@ -85,57 +160,65 @@ static void visit(char **bitmaps, char *marks, size_t length, size_t n)
marks[n] |= TEMPORARY_MARK;
for (size_t i = 0; i < length; i++) {
if (bitmaps[n][i]) {
- visit(bitmaps, marks, length, i);
+ visit(bitmaps, marks, positions, position, length, i);
}
}
marks[n] &= ~TEMPORARY_MARK;
-
marks[n] |= PERMANENT_MARK;
- printf("PUSH %ld\n", n);
+
+ positions[*position] = length - n; // deliberate offset of +2
+ (*position)++;
}
-static void topological_sort(char **bitmaps, size_t length)
+static size_t *topological_sort(char **bitmaps, size_t length)
{
char *marks = calloc(length, 1);
+ size_t *positions = calloc(length * sizeof(*positions), 1);
+
+ size_t position = 0;
int marked = 1;
- while (marked) {
+ while (marked) { // TODO: outer loop can be removed?
marked = 0;
for (size_t i = 0; i < length; i++) {
if ((marks[i] & PERMANENT_MARK) == 0)
marked = 1; // still has nodes without permanent
- if (marks[i])
+ if (marks[i] & 0x3)
continue;
- visit(bitmaps, marks, length, i);
+ visit(bitmaps, marks, positions, &position, length, i);
}
}
free(marks);
+ return positions;
}
static void write_blc(struct bloc_parsed *bloc, FILE *file)
{
- /* fprint_blc(bloc->entries[bloc->length - 1], bloc, file); */
- /* fprintf(file, "\n"); */
-
char **bitmaps = malloc(bloc->length * sizeof(*bitmaps));
for (size_t i = 0; i < bloc->length; i++) {
+ unsigned int meta = annotate(bloc, bloc->entries[i], 0);
+ if (!(meta & META_CLOSED)) {
+ bitmaps[i] = 0;
+ continue;
+ }
+
char *bitmap = calloc(bloc->length, 1);
bitmap_deps(bloc, bitmap, bloc->entries[i]);
-
- /* printf("entry %ld\n", bloc->length - i - 2); */
- /* printf("entry %ld\n", i); */
- /* print_deps(bloc->length, bitmap); */
bitmaps[i] = bitmap;
}
- /* printf("\n"); */
- topological_sort(bitmaps, bloc->length);
+ size_t *positions = topological_sort(bitmaps, bloc->length);
+ fprint_blc(positions, bitmaps, bloc, file);
+ /* fprintf(file, "\n"); */
for (size_t i = 0; i < bloc->length; i++) {
- free(bitmaps[i]);
+ /* printf("%ld: %ld, ", i, positions[i]); */
+ if (bitmaps[i])
+ free(bitmaps[i]);
}
free(bitmaps);
+ free(positions);
}
struct target_spec target_blc = {
diff --git a/src/term.c b/src/term.c
index f93a50c..c7bf6d6 100644
--- a/src/term.c
+++ b/src/term.c
@@ -14,6 +14,7 @@ struct term *new_term(term_type type)
if (!term)
fatal("out of memory!\n");
term->type = type;
+ term->meta = 0;
return term;
}