aboutsummaryrefslogtreecommitdiff
path: root/src/gc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gc.c')
-rw-r--r--src/gc.c650
1 files changed, 650 insertions, 0 deletions
diff --git a/src/gc.c b/src/gc.c
new file mode 100644
index 0000000..688478e
--- /dev/null
+++ b/src/gc.c
@@ -0,0 +1,650 @@
+#include <gc.h>
+
+#include <errno.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+//#include "primes.h"
+
+const char *log_level_strings[] = { "CRIT", "WARN", "INFO", "DEBG", "NONE" };
+
+/*
+ * Set log level for this compilation unit. If set to LOGLEVEL_DEBUG,
+ * the garbage collector will be very chatty.
+ */
+#undef LOGLEVEL
+#define LOGLEVEL LOGLEVEL_INFO
+
+/*
+ * The size of a pointer.
+ */
+#define PTRSIZE sizeof(char *)
+
+/*
+ * Allocations can temporarily be tagged as "marked" an part of the
+ * mark-and-sweep implementation or can be tagged as "roots" which are
+ * not automatically garbage collected. The latter allows the implementation
+ * of global variables.
+ */
+#define GC_TAG_NONE 0x0
+#define GC_TAG_ROOT 0x1
+#define GC_TAG_MARK 0x2
+
+/*
+ * Support for windows c compiler is added by adding this macro.
+ * Tested on: Microsoft (R) C/C++ Optimizing Compiler Version 19.24.28314 for x86
+ */
+#if defined(_MSC_VER)
+#define __builtin_frame_address(x) ((void)(x), _AddressOfReturnAddress())
+#endif
+
+/*
+ * Define a globally available GC object; this allows all code that
+ * includes the gc.h header to access a global static garbage collector.
+ * Convenient for single-threaded code, insufficient for multi-threaded
+ * use cases. Use the GC_NO_GLOBAL_GC flag to toggle.
+ */
+#ifndef GC_NO_GLOBAL_GC
+GarbageCollector gc; // global GC object
+#endif
+
+static bool is_prime(size_t n)
+{
+ /* https://stackoverflow.com/questions/1538644/c-determine-if-a-number-is-prime */
+ if (n <= 3)
+ return n > 1; // as 2 and 3 are prime
+ else if (n % 2 == 0 || n % 3 == 0)
+ return false; // check if n is divisible by 2 or 3
+ else {
+ for (size_t i = 5; i * i <= n; i += 6) {
+ if (n % i == 0 || n % (i + 2) == 0)
+ return false;
+ }
+ return true;
+ }
+}
+
+static size_t next_prime(size_t n)
+{
+ while (!is_prime(n))
+ ++n;
+ return n;
+}
+
+/**
+ * The allocation object.
+ *
+ * The allocation object holds all metadata for a memory location
+ * in one place.
+ */
+typedef struct Allocation {
+ void *ptr; // mem pointer
+ size_t size; // allocated size in bytes
+ char tag; // the tag for mark-and-sweep
+ void (*dtor)(void *); // destructor
+ struct Allocation *next; // separate chaining
+} Allocation;
+
+/**
+ * Create a new allocation object.
+ *
+ * Creates a new allocation object using the system `malloc`.
+ *
+ * @param[in] ptr The pointer to the memory to manage.
+ * @param[in] size The size of the memory range pointed to by `ptr`.
+ * @param[in] dtor A pointer to a destructor function that should be called
+ * before freeing the memory pointed to by `ptr`.
+ * @returns Pointer to the new allocation instance.
+ */
+static Allocation *gc_allocation_new(void *ptr, size_t size,
+ void (*dtor)(void *))
+{
+ Allocation *a = (Allocation *)malloc(sizeof(Allocation));
+ a->ptr = ptr;
+ a->size = size;
+ a->tag = GC_TAG_NONE;
+ a->dtor = dtor;
+ a->next = NULL;
+ return a;
+}
+
+/**
+ * Delete an allocation object.
+ *
+ * Deletes the allocation object pointed to by `a`, but does *not*
+ * free the memory pointed to by `a->ptr`.
+ *
+ * @param a The allocation object to delete.
+ */
+static void gc_allocation_delete(Allocation *a)
+{
+ free(a);
+}
+
+/**
+ * The allocation hash map.
+ *
+ * The core data structure is a hash map that holds the allocation
+ * objects and allows O(1) retrieval given the memory location. Collision
+ * resolution is implemented using separate chaining.
+ */
+typedef struct AllocationMap {
+ size_t capacity;
+ size_t min_capacity;
+ double downsize_factor;
+ double upsize_factor;
+ double sweep_factor;
+ size_t sweep_limit;
+ size_t size;
+ Allocation **allocs;
+} AllocationMap;
+
+/**
+ * Determine the current load factor of an `AllocationMap`.
+ *
+ * Calculates the load factor of the hash map as the quotient of the size and
+ * the capacity of the hash map.
+ *
+ * @param am The allocationo map to calculate the load factor for.
+ * @returns The load factor of the allocation map `am`.
+ */
+static double gc_allocation_map_load_factor(AllocationMap *am)
+{
+ return (double)am->size / (double)am->capacity;
+}
+
+static AllocationMap *
+gc_allocation_map_new(size_t min_capacity, size_t capacity, double sweep_factor,
+ double downsize_factor, double upsize_factor)
+{
+ AllocationMap *am = (AllocationMap *)malloc(sizeof(AllocationMap));
+ am->min_capacity = next_prime(min_capacity);
+ am->capacity = next_prime(capacity);
+ if (am->capacity < am->min_capacity)
+ am->capacity = am->min_capacity;
+ am->sweep_factor = sweep_factor;
+ am->sweep_limit = (int)(sweep_factor * am->capacity);
+ am->downsize_factor = downsize_factor;
+ am->upsize_factor = upsize_factor;
+ am->allocs = (Allocation **)calloc(am->capacity, sizeof(Allocation *));
+ am->size = 0;
+ LOG_DEBUG("Created allocation map (cap=%ld, siz=%ld)", am->capacity,
+ am->size);
+ return am;
+}
+
+static void gc_allocation_map_delete(AllocationMap *am)
+{
+ // Iterate over the map
+ LOG_DEBUG("Deleting allocation map (cap=%ld, siz=%ld)", am->capacity,
+ am->size);
+ Allocation *alloc, *tmp;
+ for (size_t i = 0; i < am->capacity; ++i) {
+ if ((alloc = am->allocs[i])) {
+ // Make sure to follow the chain inside a bucket
+ while (alloc) {
+ tmp = alloc;
+ alloc = alloc->next;
+ // free the management structure
+ gc_allocation_delete(tmp);
+ }
+ }
+ }
+ free(am->allocs);
+ free(am);
+}
+
+static size_t gc_hash(void *ptr)
+{
+ return ((uintptr_t)ptr) >> 3;
+}
+
+static void gc_allocation_map_resize(AllocationMap *am, size_t new_capacity)
+{
+ if (new_capacity <= am->min_capacity) {
+ return;
+ }
+ // Replaces the existing items array in the hash table
+ // with a resized one and pushes items into the new, correct buckets
+ LOG_DEBUG("Resizing allocation map (cap=%ld, siz=%ld) -> (cap=%ld)",
+ am->capacity, am->size, new_capacity);
+ Allocation **resized_allocs =
+ calloc(new_capacity, sizeof(Allocation *));
+
+ for (size_t i = 0; i < am->capacity; ++i) {
+ Allocation *alloc = am->allocs[i];
+ while (alloc) {
+ Allocation *next_alloc = alloc->next;
+ size_t new_index = gc_hash(alloc->ptr) % new_capacity;
+ alloc->next = resized_allocs[new_index];
+ resized_allocs[new_index] = alloc;
+ alloc = next_alloc;
+ }
+ }
+ free(am->allocs);
+ am->capacity = new_capacity;
+ am->allocs = resized_allocs;
+ am->sweep_limit =
+ am->size + am->sweep_factor * (am->capacity - am->size);
+}
+
+static bool gc_allocation_map_resize_to_fit(AllocationMap *am)
+{
+ double load_factor = gc_allocation_map_load_factor(am);
+ if (load_factor > am->upsize_factor) {
+ LOG_DEBUG("Load factor %0.3g > %0.3g. Triggering upsize.",
+ load_factor, am->upsize_factor);
+ gc_allocation_map_resize(am, next_prime(am->capacity * 2));
+ return true;
+ }
+ if (load_factor < am->downsize_factor) {
+ LOG_DEBUG("Load factor %0.3g < %0.3g. Triggering downsize.",
+ load_factor, am->downsize_factor);
+ gc_allocation_map_resize(am, next_prime(am->capacity / 2));
+ return true;
+ }
+ return false;
+}
+
+static Allocation *gc_allocation_map_get(AllocationMap *am, void *ptr)
+{
+ size_t index = gc_hash(ptr) % am->capacity;
+ Allocation *cur = am->allocs[index];
+ while (cur) {
+ if (cur->ptr == ptr) {
+ return cur;
+ }
+ cur = cur->next;
+ }
+ return NULL;
+}
+
+static Allocation *gc_allocation_map_put(AllocationMap *am, void *ptr,
+ size_t size, void (*dtor)(void *))
+{
+ size_t index = gc_hash(ptr) % am->capacity;
+ LOG_DEBUG("PUT request for allocation ix=%ld", index);
+ Allocation *alloc = gc_allocation_new(ptr, size, dtor);
+ Allocation *cur = am->allocs[index];
+ Allocation *prev = NULL;
+ /* Upsert if ptr is already known (e.g. dtor update). */
+ while (cur != NULL) {
+ if (cur->ptr == ptr) {
+ // found it
+ alloc->next = cur->next;
+ if (!prev) {
+ // position 0
+ am->allocs[index] = alloc;
+ } else {
+ // in the list
+ prev->next = alloc;
+ }
+ gc_allocation_delete(cur);
+ LOG_DEBUG("AllocationMap Upsert at ix=%ld", index);
+ return alloc;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ /* Insert at the front of the separate chaining list */
+ cur = am->allocs[index];
+ alloc->next = cur;
+ am->allocs[index] = alloc;
+ am->size++;
+ LOG_DEBUG("AllocationMap insert at ix=%ld", index);
+ void *p = alloc->ptr;
+ if (gc_allocation_map_resize_to_fit(am)) {
+ alloc = gc_allocation_map_get(am, p);
+ }
+ return alloc;
+}
+
+static void gc_allocation_map_remove(AllocationMap *am, void *ptr,
+ bool allow_resize)
+{
+ // ignores unknown keys
+ size_t index = gc_hash(ptr) % am->capacity;
+ Allocation *cur = am->allocs[index];
+ Allocation *prev = NULL;
+ Allocation *next;
+ while (cur != NULL) {
+ next = cur->next;
+ if (cur->ptr == ptr) {
+ // found it
+ if (!prev) {
+ // first item in list
+ am->allocs[index] = cur->next;
+ } else {
+ // not the first item in the list
+ prev->next = cur->next;
+ }
+ gc_allocation_delete(cur);
+ am->size--;
+ } else {
+ // move on
+ prev = cur;
+ }
+ cur = next;
+ }
+ if (allow_resize) {
+ gc_allocation_map_resize_to_fit(am);
+ }
+}
+
+static void *gc_mcalloc(size_t count, size_t size)
+{
+ if (!count)
+ return malloc(size);
+ return calloc(count, size);
+}
+
+static bool gc_needs_sweep(GarbageCollector *gc)
+{
+ return gc->allocs->size > gc->allocs->sweep_limit;
+}
+
+static void *gc_allocate(GarbageCollector *gc, size_t count, size_t size,
+ void (*dtor)(void *))
+{
+ /* Allocation logic that generalizes over malloc/calloc. */
+
+ /* Check if we reached the high-water mark and need to clean up */
+ if (gc_needs_sweep(gc) && !gc->paused) {
+ size_t freed_mem = gc_run(gc);
+ LOG_DEBUG("Garbage collection cleaned up %lu bytes.",
+ freed_mem);
+ }
+ /* With cleanup out of the way, attempt to allocate memory */
+ void *ptr = gc_mcalloc(count, size);
+ size_t alloc_size = count ? count * size : size;
+ /* If allocation fails, force an out-of-policy run to free some memory and try again. */
+ if (!ptr && !gc->paused && (errno == EAGAIN || errno == ENOMEM)) {
+ gc_run(gc);
+ ptr = gc_mcalloc(count, size);
+ }
+ /* Start managing the memory we received from the system */
+ if (ptr) {
+ LOG_DEBUG("Allocated %zu bytes at %p", alloc_size, (void *)ptr);
+ Allocation *alloc = gc_allocation_map_put(gc->allocs, ptr,
+ alloc_size, dtor);
+ /* Deal with metadata allocation failure */
+ if (alloc) {
+ LOG_DEBUG("Managing %zu bytes at %p", alloc_size,
+ (void *)alloc->ptr);
+ ptr = alloc->ptr;
+ } else {
+ /* We failed to allocate the metadata, fail cleanly. */
+ free(ptr);
+ ptr = NULL;
+ }
+ }
+ return ptr;
+}
+
+static void gc_make_root(GarbageCollector *gc, void *ptr)
+{
+ Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr);
+ if (alloc) {
+ alloc->tag |= GC_TAG_ROOT;
+ }
+}
+
+void *gc_malloc(GarbageCollector *gc, size_t size)
+{
+ return gc_malloc_ext(gc, size, NULL);
+}
+
+void *gc_malloc_static(GarbageCollector *gc, size_t size, void (*dtor)(void *))
+{
+ void *ptr = gc_malloc_ext(gc, size, dtor);
+ gc_make_root(gc, ptr);
+ return ptr;
+}
+
+void *gc_make_static(GarbageCollector *gc, void *ptr)
+{
+ gc_make_root(gc, ptr);
+ return ptr;
+}
+
+void *gc_malloc_ext(GarbageCollector *gc, size_t size, void (*dtor)(void *))
+{
+ return gc_allocate(gc, 0, size, dtor);
+}
+
+void *gc_calloc(GarbageCollector *gc, size_t count, size_t size)
+{
+ return gc_calloc_ext(gc, count, size, NULL);
+}
+
+void *gc_calloc_ext(GarbageCollector *gc, size_t count, size_t size,
+ void (*dtor)(void *))
+{
+ return gc_allocate(gc, count, size, dtor);
+}
+
+void *gc_realloc(GarbageCollector *gc, void *p, size_t size)
+{
+ Allocation *alloc = gc_allocation_map_get(gc->allocs, p);
+ if (p && !alloc) {
+ // the user passed an unknown pointer
+ errno = EINVAL;
+ return NULL;
+ }
+ void *q = realloc(p, size);
+ if (!q) {
+ // realloc failed but p is still valid
+ return NULL;
+ }
+ if (!p) {
+ // allocation, not reallocation
+ Allocation *alloc =
+ gc_allocation_map_put(gc->allocs, q, size, NULL);
+ return alloc->ptr;
+ }
+ if (p == q) {
+ // successful reallocation w/o copy
+ alloc->size = size;
+ } else {
+ // successful reallocation w/ copy
+ void (*dtor)(void *) = alloc->dtor;
+ gc_allocation_map_remove(gc->allocs, p, true);
+ gc_allocation_map_put(gc->allocs, q, size, dtor);
+ }
+ return q;
+}
+
+void gc_free(GarbageCollector *gc, void *ptr)
+{
+ Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr);
+ if (alloc) {
+ if (alloc->dtor) {
+ alloc->dtor(ptr);
+ }
+ free(ptr);
+ gc_allocation_map_remove(gc->allocs, ptr, true);
+ } else {
+ LOG_WARNING("Ignoring request to free unknown pointer %p",
+ (void *)ptr);
+ }
+}
+
+void gc_start(GarbageCollector *gc, void *bos)
+{
+ gc_start_ext(gc, bos, 1024, 1024, 0.2, 0.8, 0.5);
+}
+
+void gc_start_ext(GarbageCollector *gc, void *bos, size_t initial_capacity,
+ size_t min_capacity, double downsize_load_factor,
+ double upsize_load_factor, double sweep_factor)
+{
+ double downsize_limit =
+ downsize_load_factor > 0.0 ? downsize_load_factor : 0.2;
+ double upsize_limit =
+ upsize_load_factor > 0.0 ? upsize_load_factor : 0.8;
+ sweep_factor = sweep_factor > 0.0 ? sweep_factor : 0.5;
+ gc->paused = false;
+ gc->bos = bos;
+ initial_capacity = initial_capacity < min_capacity ? min_capacity :
+ initial_capacity;
+ gc->allocs = gc_allocation_map_new(min_capacity, initial_capacity,
+ sweep_factor, downsize_limit,
+ upsize_limit);
+ LOG_DEBUG("Created new garbage collector (cap=%ld, siz=%ld).",
+ gc->allocs->capacity, gc->allocs->size);
+}
+
+void gc_pause(GarbageCollector *gc)
+{
+ gc->paused = true;
+}
+
+void gc_resume(GarbageCollector *gc)
+{
+ gc->paused = false;
+}
+
+void gc_mark_alloc(GarbageCollector *gc, void *ptr)
+{
+ Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr);
+ /* Mark if alloc exists and is not tagged already, otherwise skip */
+ if (alloc && !(alloc->tag & GC_TAG_MARK)) {
+ LOG_DEBUG("Marking allocation (ptr=%p)", ptr);
+ alloc->tag |= GC_TAG_MARK;
+ /* Iterate over allocation contents and mark them as well */
+ LOG_DEBUG("Checking allocation (ptr=%p, size=%lu) contents",
+ ptr, alloc->size);
+ for (char *p = (char *)alloc->ptr;
+ p <= (char *)alloc->ptr + alloc->size - PTRSIZE; ++p) {
+ LOG_DEBUG(
+ "Checking allocation (ptr=%p) @%lu with value %p",
+ ptr, p - ((char *)alloc->ptr), *(void **)p);
+ gc_mark_alloc(gc, *(void **)p);
+ }
+ }
+}
+
+void gc_mark_stack(GarbageCollector *gc)
+{
+ LOG_DEBUG("Marking the stack (gc@%p) in increments of %ld", (void *)gc,
+ sizeof(char));
+ void *tos = __builtin_frame_address(0);
+ void *bos = gc->bos;
+ /* The stack grows towards smaller memory addresses, hence we scan tos->bos.
+ * Stop scanning once the distance between tos & bos is too small to hold a valid pointer */
+ for (char *p = (char *)tos; p <= (char *)bos - PTRSIZE; ++p) {
+ gc_mark_alloc(gc, *(void **)p);
+ }
+}
+
+void gc_mark_roots(GarbageCollector *gc)
+{
+ LOG_DEBUG("Marking roots%s", "");
+ for (size_t i = 0; i < gc->allocs->capacity; ++i) {
+ Allocation *chunk = gc->allocs->allocs[i];
+ while (chunk) {
+ if (chunk->tag & GC_TAG_ROOT) {
+ LOG_DEBUG("Marking root @ %p", chunk->ptr);
+ gc_mark_alloc(gc, chunk->ptr);
+ }
+ chunk = chunk->next;
+ }
+ }
+}
+
+void gc_mark(GarbageCollector *gc)
+{
+ /* Note: We only look at the stack and the heap, and ignore BSS. */
+ LOG_DEBUG("Initiating GC mark (gc@%p)", (void *)gc);
+ /* Scan the heap for roots */
+ gc_mark_roots(gc);
+ /* Dump registers onto stack and scan the stack */
+ void (*volatile _mark_stack)(GarbageCollector *) = gc_mark_stack;
+ jmp_buf ctx;
+ memset(&ctx, 0, sizeof(jmp_buf));
+ setjmp(ctx);
+ _mark_stack(gc);
+}
+
+size_t gc_sweep(GarbageCollector *gc)
+{
+ LOG_DEBUG("Initiating GC sweep (gc@%p)", (void *)gc);
+ size_t total = 0;
+ for (size_t i = 0; i < gc->allocs->capacity; ++i) {
+ Allocation *chunk = gc->allocs->allocs[i];
+ Allocation *next = NULL;
+ /* Iterate over separate chaining */
+ while (chunk) {
+ if (chunk->tag & GC_TAG_MARK) {
+ LOG_DEBUG("Found used allocation %p (ptr=%p)",
+ (void *)chunk, (void *)chunk->ptr);
+ /* unmark */
+ chunk->tag &= ~GC_TAG_MARK;
+ chunk = chunk->next;
+ } else {
+ LOG_DEBUG(
+ "Found unused allocation %p (%lu bytes @ ptr=%p)",
+ (void *)chunk, chunk->size,
+ (void *)chunk->ptr);
+ /* no reference to this chunk, hence delete it */
+ total += chunk->size;
+ if (chunk->dtor) {
+ chunk->dtor(chunk->ptr);
+ }
+ free(chunk->ptr);
+ /* and remove it from the bookkeeping */
+ next = chunk->next;
+ gc_allocation_map_remove(gc->allocs, chunk->ptr,
+ false);
+ chunk = next;
+ }
+ }
+ }
+ gc_allocation_map_resize_to_fit(gc->allocs);
+ return total;
+}
+
+/**
+ * Unset the ROOT tag on all roots on the heap.
+ *
+ * @param gc A pointer to a garbage collector instance.
+ */
+void gc_unroot_roots(GarbageCollector *gc)
+{
+ LOG_DEBUG("Unmarking roots%s", "");
+ for (size_t i = 0; i < gc->allocs->capacity; ++i) {
+ Allocation *chunk = gc->allocs->allocs[i];
+ while (chunk) {
+ if (chunk->tag & GC_TAG_ROOT) {
+ chunk->tag &= ~GC_TAG_ROOT;
+ }
+ chunk = chunk->next;
+ }
+ }
+}
+
+size_t gc_stop(GarbageCollector *gc)
+{
+ gc_unroot_roots(gc);
+ size_t collected = gc_sweep(gc);
+ gc_allocation_map_delete(gc->allocs);
+ return collected;
+}
+
+size_t gc_run(GarbageCollector *gc)
+{
+ LOG_DEBUG("Initiating GC run (gc@%p)", (void *)gc);
+ gc_mark(gc);
+ return gc_sweep(gc);
+}
+
+char *gc_strdup(GarbageCollector *gc, const char *s)
+{
+ size_t len = strlen(s) + 1;
+ void *new = gc_malloc(gc, len);
+
+ if (new == NULL) {
+ return NULL;
+ }
+ return (char *)memcpy(new, s, len);
+}