diff options
author | Marvin Borner | 2023-02-20 16:43:54 +0100 |
---|---|---|
committer | Marvin Borner | 2023-02-20 16:44:34 +0100 |
commit | 71c96b0ecd2f515fc5cfe545f6f7ed5ea40d9469 (patch) | |
tree | 71a7ad3d2b10d126afabefac183e48ae081e12f8 /src/gc.c | |
parent | a162fdc74abf0686ec06e65e06d67a8ce5c13b30 (diff) |
Seems to work
WHY WAS THIS SO EASY?! I spent basically the entire last week trying to
build a reference based garbage collector wtf fuck that
Diffstat (limited to 'src/gc.c')
-rw-r--r-- | src/gc.c | 650 |
1 files changed, 0 insertions, 650 deletions
diff --git a/src/gc.c b/src/gc.c deleted file mode 100644 index 688478e..0000000 --- a/src/gc.c +++ /dev/null @@ -1,650 +0,0 @@ -#include <gc.h> - -#include <errno.h> -#include <setjmp.h> -#include <stdlib.h> -#include <string.h> -//#include "primes.h" - -const char *log_level_strings[] = { "CRIT", "WARN", "INFO", "DEBG", "NONE" }; - -/* - * Set log level for this compilation unit. If set to LOGLEVEL_DEBUG, - * the garbage collector will be very chatty. - */ -#undef LOGLEVEL -#define LOGLEVEL LOGLEVEL_INFO - -/* - * The size of a pointer. - */ -#define PTRSIZE sizeof(char *) - -/* - * Allocations can temporarily be tagged as "marked" an part of the - * mark-and-sweep implementation or can be tagged as "roots" which are - * not automatically garbage collected. The latter allows the implementation - * of global variables. - */ -#define GC_TAG_NONE 0x0 -#define GC_TAG_ROOT 0x1 -#define GC_TAG_MARK 0x2 - -/* - * Support for windows c compiler is added by adding this macro. - * Tested on: Microsoft (R) C/C++ Optimizing Compiler Version 19.24.28314 for x86 - */ -#if defined(_MSC_VER) -#define __builtin_frame_address(x) ((void)(x), _AddressOfReturnAddress()) -#endif - -/* - * Define a globally available GC object; this allows all code that - * includes the gc.h header to access a global static garbage collector. - * Convenient for single-threaded code, insufficient for multi-threaded - * use cases. Use the GC_NO_GLOBAL_GC flag to toggle. - */ -#ifndef GC_NO_GLOBAL_GC -GarbageCollector gc; // global GC object -#endif - -static bool is_prime(size_t n) -{ - /* https://stackoverflow.com/questions/1538644/c-determine-if-a-number-is-prime */ - if (n <= 3) - return n > 1; // as 2 and 3 are prime - else if (n % 2 == 0 || n % 3 == 0) - return false; // check if n is divisible by 2 or 3 - else { - for (size_t i = 5; i * i <= n; i += 6) { - if (n % i == 0 || n % (i + 2) == 0) - return false; - } - return true; - } -} - -static size_t next_prime(size_t n) -{ - while (!is_prime(n)) - ++n; - return n; -} - -/** - * The allocation object. - * - * The allocation object holds all metadata for a memory location - * in one place. - */ -typedef struct Allocation { - void *ptr; // mem pointer - size_t size; // allocated size in bytes - char tag; // the tag for mark-and-sweep - void (*dtor)(void *); // destructor - struct Allocation *next; // separate chaining -} Allocation; - -/** - * Create a new allocation object. - * - * Creates a new allocation object using the system `malloc`. - * - * @param[in] ptr The pointer to the memory to manage. - * @param[in] size The size of the memory range pointed to by `ptr`. - * @param[in] dtor A pointer to a destructor function that should be called - * before freeing the memory pointed to by `ptr`. - * @returns Pointer to the new allocation instance. - */ -static Allocation *gc_allocation_new(void *ptr, size_t size, - void (*dtor)(void *)) -{ - Allocation *a = (Allocation *)malloc(sizeof(Allocation)); - a->ptr = ptr; - a->size = size; - a->tag = GC_TAG_NONE; - a->dtor = dtor; - a->next = NULL; - return a; -} - -/** - * Delete an allocation object. - * - * Deletes the allocation object pointed to by `a`, but does *not* - * free the memory pointed to by `a->ptr`. - * - * @param a The allocation object to delete. - */ -static void gc_allocation_delete(Allocation *a) -{ - free(a); -} - -/** - * The allocation hash map. - * - * The core data structure is a hash map that holds the allocation - * objects and allows O(1) retrieval given the memory location. Collision - * resolution is implemented using separate chaining. - */ -typedef struct AllocationMap { - size_t capacity; - size_t min_capacity; - double downsize_factor; - double upsize_factor; - double sweep_factor; - size_t sweep_limit; - size_t size; - Allocation **allocs; -} AllocationMap; - -/** - * Determine the current load factor of an `AllocationMap`. - * - * Calculates the load factor of the hash map as the quotient of the size and - * the capacity of the hash map. - * - * @param am The allocationo map to calculate the load factor for. - * @returns The load factor of the allocation map `am`. - */ -static double gc_allocation_map_load_factor(AllocationMap *am) -{ - return (double)am->size / (double)am->capacity; -} - -static AllocationMap * -gc_allocation_map_new(size_t min_capacity, size_t capacity, double sweep_factor, - double downsize_factor, double upsize_factor) -{ - AllocationMap *am = (AllocationMap *)malloc(sizeof(AllocationMap)); - am->min_capacity = next_prime(min_capacity); - am->capacity = next_prime(capacity); - if (am->capacity < am->min_capacity) - am->capacity = am->min_capacity; - am->sweep_factor = sweep_factor; - am->sweep_limit = (int)(sweep_factor * am->capacity); - am->downsize_factor = downsize_factor; - am->upsize_factor = upsize_factor; - am->allocs = (Allocation **)calloc(am->capacity, sizeof(Allocation *)); - am->size = 0; - LOG_DEBUG("Created allocation map (cap=%ld, siz=%ld)", am->capacity, - am->size); - return am; -} - -static void gc_allocation_map_delete(AllocationMap *am) -{ - // Iterate over the map - LOG_DEBUG("Deleting allocation map (cap=%ld, siz=%ld)", am->capacity, - am->size); - Allocation *alloc, *tmp; - for (size_t i = 0; i < am->capacity; ++i) { - if ((alloc = am->allocs[i])) { - // Make sure to follow the chain inside a bucket - while (alloc) { - tmp = alloc; - alloc = alloc->next; - // free the management structure - gc_allocation_delete(tmp); - } - } - } - free(am->allocs); - free(am); -} - -static size_t gc_hash(void *ptr) -{ - return ((uintptr_t)ptr) >> 3; -} - -static void gc_allocation_map_resize(AllocationMap *am, size_t new_capacity) -{ - if (new_capacity <= am->min_capacity) { - return; - } - // Replaces the existing items array in the hash table - // with a resized one and pushes items into the new, correct buckets - LOG_DEBUG("Resizing allocation map (cap=%ld, siz=%ld) -> (cap=%ld)", - am->capacity, am->size, new_capacity); - Allocation **resized_allocs = - calloc(new_capacity, sizeof(Allocation *)); - - for (size_t i = 0; i < am->capacity; ++i) { - Allocation *alloc = am->allocs[i]; - while (alloc) { - Allocation *next_alloc = alloc->next; - size_t new_index = gc_hash(alloc->ptr) % new_capacity; - alloc->next = resized_allocs[new_index]; - resized_allocs[new_index] = alloc; - alloc = next_alloc; - } - } - free(am->allocs); - am->capacity = new_capacity; - am->allocs = resized_allocs; - am->sweep_limit = - am->size + am->sweep_factor * (am->capacity - am->size); -} - -static bool gc_allocation_map_resize_to_fit(AllocationMap *am) -{ - double load_factor = gc_allocation_map_load_factor(am); - if (load_factor > am->upsize_factor) { - LOG_DEBUG("Load factor %0.3g > %0.3g. Triggering upsize.", - load_factor, am->upsize_factor); - gc_allocation_map_resize(am, next_prime(am->capacity * 2)); - return true; - } - if (load_factor < am->downsize_factor) { - LOG_DEBUG("Load factor %0.3g < %0.3g. Triggering downsize.", - load_factor, am->downsize_factor); - gc_allocation_map_resize(am, next_prime(am->capacity / 2)); - return true; - } - return false; -} - -static Allocation *gc_allocation_map_get(AllocationMap *am, void *ptr) -{ - size_t index = gc_hash(ptr) % am->capacity; - Allocation *cur = am->allocs[index]; - while (cur) { - if (cur->ptr == ptr) { - return cur; - } - cur = cur->next; - } - return NULL; -} - -static Allocation *gc_allocation_map_put(AllocationMap *am, void *ptr, - size_t size, void (*dtor)(void *)) -{ - size_t index = gc_hash(ptr) % am->capacity; - LOG_DEBUG("PUT request for allocation ix=%ld", index); - Allocation *alloc = gc_allocation_new(ptr, size, dtor); - Allocation *cur = am->allocs[index]; - Allocation *prev = NULL; - /* Upsert if ptr is already known (e.g. dtor update). */ - while (cur != NULL) { - if (cur->ptr == ptr) { - // found it - alloc->next = cur->next; - if (!prev) { - // position 0 - am->allocs[index] = alloc; - } else { - // in the list - prev->next = alloc; - } - gc_allocation_delete(cur); - LOG_DEBUG("AllocationMap Upsert at ix=%ld", index); - return alloc; - } - prev = cur; - cur = cur->next; - } - /* Insert at the front of the separate chaining list */ - cur = am->allocs[index]; - alloc->next = cur; - am->allocs[index] = alloc; - am->size++; - LOG_DEBUG("AllocationMap insert at ix=%ld", index); - void *p = alloc->ptr; - if (gc_allocation_map_resize_to_fit(am)) { - alloc = gc_allocation_map_get(am, p); - } - return alloc; -} - -static void gc_allocation_map_remove(AllocationMap *am, void *ptr, - bool allow_resize) -{ - // ignores unknown keys - size_t index = gc_hash(ptr) % am->capacity; - Allocation *cur = am->allocs[index]; - Allocation *prev = NULL; - Allocation *next; - while (cur != NULL) { - next = cur->next; - if (cur->ptr == ptr) { - // found it - if (!prev) { - // first item in list - am->allocs[index] = cur->next; - } else { - // not the first item in the list - prev->next = cur->next; - } - gc_allocation_delete(cur); - am->size--; - } else { - // move on - prev = cur; - } - cur = next; - } - if (allow_resize) { - gc_allocation_map_resize_to_fit(am); - } -} - -static void *gc_mcalloc(size_t count, size_t size) -{ - if (!count) - return malloc(size); - return calloc(count, size); -} - -static bool gc_needs_sweep(GarbageCollector *gc) -{ - return gc->allocs->size > gc->allocs->sweep_limit; -} - -static void *gc_allocate(GarbageCollector *gc, size_t count, size_t size, - void (*dtor)(void *)) -{ - /* Allocation logic that generalizes over malloc/calloc. */ - - /* Check if we reached the high-water mark and need to clean up */ - if (gc_needs_sweep(gc) && !gc->paused) { - size_t freed_mem = gc_run(gc); - LOG_DEBUG("Garbage collection cleaned up %lu bytes.", - freed_mem); - } - /* With cleanup out of the way, attempt to allocate memory */ - void *ptr = gc_mcalloc(count, size); - size_t alloc_size = count ? count * size : size; - /* If allocation fails, force an out-of-policy run to free some memory and try again. */ - if (!ptr && !gc->paused && (errno == EAGAIN || errno == ENOMEM)) { - gc_run(gc); - ptr = gc_mcalloc(count, size); - } - /* Start managing the memory we received from the system */ - if (ptr) { - LOG_DEBUG("Allocated %zu bytes at %p", alloc_size, (void *)ptr); - Allocation *alloc = gc_allocation_map_put(gc->allocs, ptr, - alloc_size, dtor); - /* Deal with metadata allocation failure */ - if (alloc) { - LOG_DEBUG("Managing %zu bytes at %p", alloc_size, - (void *)alloc->ptr); - ptr = alloc->ptr; - } else { - /* We failed to allocate the metadata, fail cleanly. */ - free(ptr); - ptr = NULL; - } - } - return ptr; -} - -static void gc_make_root(GarbageCollector *gc, void *ptr) -{ - Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr); - if (alloc) { - alloc->tag |= GC_TAG_ROOT; - } -} - -void *gc_malloc(GarbageCollector *gc, size_t size) -{ - return gc_malloc_ext(gc, size, NULL); -} - -void *gc_malloc_static(GarbageCollector *gc, size_t size, void (*dtor)(void *)) -{ - void *ptr = gc_malloc_ext(gc, size, dtor); - gc_make_root(gc, ptr); - return ptr; -} - -void *gc_make_static(GarbageCollector *gc, void *ptr) -{ - gc_make_root(gc, ptr); - return ptr; -} - -void *gc_malloc_ext(GarbageCollector *gc, size_t size, void (*dtor)(void *)) -{ - return gc_allocate(gc, 0, size, dtor); -} - -void *gc_calloc(GarbageCollector *gc, size_t count, size_t size) -{ - return gc_calloc_ext(gc, count, size, NULL); -} - -void *gc_calloc_ext(GarbageCollector *gc, size_t count, size_t size, - void (*dtor)(void *)) -{ - return gc_allocate(gc, count, size, dtor); -} - -void *gc_realloc(GarbageCollector *gc, void *p, size_t size) -{ - Allocation *alloc = gc_allocation_map_get(gc->allocs, p); - if (p && !alloc) { - // the user passed an unknown pointer - errno = EINVAL; - return NULL; - } - void *q = realloc(p, size); - if (!q) { - // realloc failed but p is still valid - return NULL; - } - if (!p) { - // allocation, not reallocation - Allocation *alloc = - gc_allocation_map_put(gc->allocs, q, size, NULL); - return alloc->ptr; - } - if (p == q) { - // successful reallocation w/o copy - alloc->size = size; - } else { - // successful reallocation w/ copy - void (*dtor)(void *) = alloc->dtor; - gc_allocation_map_remove(gc->allocs, p, true); - gc_allocation_map_put(gc->allocs, q, size, dtor); - } - return q; -} - -void gc_free(GarbageCollector *gc, void *ptr) -{ - Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr); - if (alloc) { - if (alloc->dtor) { - alloc->dtor(ptr); - } - free(ptr); - gc_allocation_map_remove(gc->allocs, ptr, true); - } else { - LOG_WARNING("Ignoring request to free unknown pointer %p", - (void *)ptr); - } -} - -void gc_start(GarbageCollector *gc, void *bos) -{ - gc_start_ext(gc, bos, 1024, 1024, 0.2, 0.8, 0.5); -} - -void gc_start_ext(GarbageCollector *gc, void *bos, size_t initial_capacity, - size_t min_capacity, double downsize_load_factor, - double upsize_load_factor, double sweep_factor) -{ - double downsize_limit = - downsize_load_factor > 0.0 ? downsize_load_factor : 0.2; - double upsize_limit = - upsize_load_factor > 0.0 ? upsize_load_factor : 0.8; - sweep_factor = sweep_factor > 0.0 ? sweep_factor : 0.5; - gc->paused = false; - gc->bos = bos; - initial_capacity = initial_capacity < min_capacity ? min_capacity : - initial_capacity; - gc->allocs = gc_allocation_map_new(min_capacity, initial_capacity, - sweep_factor, downsize_limit, - upsize_limit); - LOG_DEBUG("Created new garbage collector (cap=%ld, siz=%ld).", - gc->allocs->capacity, gc->allocs->size); -} - -void gc_pause(GarbageCollector *gc) -{ - gc->paused = true; -} - -void gc_resume(GarbageCollector *gc) -{ - gc->paused = false; -} - -void gc_mark_alloc(GarbageCollector *gc, void *ptr) -{ - Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr); - /* Mark if alloc exists and is not tagged already, otherwise skip */ - if (alloc && !(alloc->tag & GC_TAG_MARK)) { - LOG_DEBUG("Marking allocation (ptr=%p)", ptr); - alloc->tag |= GC_TAG_MARK; - /* Iterate over allocation contents and mark them as well */ - LOG_DEBUG("Checking allocation (ptr=%p, size=%lu) contents", - ptr, alloc->size); - for (char *p = (char *)alloc->ptr; - p <= (char *)alloc->ptr + alloc->size - PTRSIZE; ++p) { - LOG_DEBUG( - "Checking allocation (ptr=%p) @%lu with value %p", - ptr, p - ((char *)alloc->ptr), *(void **)p); - gc_mark_alloc(gc, *(void **)p); - } - } -} - -void gc_mark_stack(GarbageCollector *gc) -{ - LOG_DEBUG("Marking the stack (gc@%p) in increments of %ld", (void *)gc, - sizeof(char)); - void *tos = __builtin_frame_address(0); - void *bos = gc->bos; - /* The stack grows towards smaller memory addresses, hence we scan tos->bos. - * Stop scanning once the distance between tos & bos is too small to hold a valid pointer */ - for (char *p = (char *)tos; p <= (char *)bos - PTRSIZE; ++p) { - gc_mark_alloc(gc, *(void **)p); - } -} - -void gc_mark_roots(GarbageCollector *gc) -{ - LOG_DEBUG("Marking roots%s", ""); - for (size_t i = 0; i < gc->allocs->capacity; ++i) { - Allocation *chunk = gc->allocs->allocs[i]; - while (chunk) { - if (chunk->tag & GC_TAG_ROOT) { - LOG_DEBUG("Marking root @ %p", chunk->ptr); - gc_mark_alloc(gc, chunk->ptr); - } - chunk = chunk->next; - } - } -} - -void gc_mark(GarbageCollector *gc) -{ - /* Note: We only look at the stack and the heap, and ignore BSS. */ - LOG_DEBUG("Initiating GC mark (gc@%p)", (void *)gc); - /* Scan the heap for roots */ - gc_mark_roots(gc); - /* Dump registers onto stack and scan the stack */ - void (*volatile _mark_stack)(GarbageCollector *) = gc_mark_stack; - jmp_buf ctx; - memset(&ctx, 0, sizeof(jmp_buf)); - setjmp(ctx); - _mark_stack(gc); -} - -size_t gc_sweep(GarbageCollector *gc) -{ - LOG_DEBUG("Initiating GC sweep (gc@%p)", (void *)gc); - size_t total = 0; - for (size_t i = 0; i < gc->allocs->capacity; ++i) { - Allocation *chunk = gc->allocs->allocs[i]; - Allocation *next = NULL; - /* Iterate over separate chaining */ - while (chunk) { - if (chunk->tag & GC_TAG_MARK) { - LOG_DEBUG("Found used allocation %p (ptr=%p)", - (void *)chunk, (void *)chunk->ptr); - /* unmark */ - chunk->tag &= ~GC_TAG_MARK; - chunk = chunk->next; - } else { - LOG_DEBUG( - "Found unused allocation %p (%lu bytes @ ptr=%p)", - (void *)chunk, chunk->size, - (void *)chunk->ptr); - /* no reference to this chunk, hence delete it */ - total += chunk->size; - if (chunk->dtor) { - chunk->dtor(chunk->ptr); - } - free(chunk->ptr); - /* and remove it from the bookkeeping */ - next = chunk->next; - gc_allocation_map_remove(gc->allocs, chunk->ptr, - false); - chunk = next; - } - } - } - gc_allocation_map_resize_to_fit(gc->allocs); - return total; -} - -/** - * Unset the ROOT tag on all roots on the heap. - * - * @param gc A pointer to a garbage collector instance. - */ -void gc_unroot_roots(GarbageCollector *gc) -{ - LOG_DEBUG("Unmarking roots%s", ""); - for (size_t i = 0; i < gc->allocs->capacity; ++i) { - Allocation *chunk = gc->allocs->allocs[i]; - while (chunk) { - if (chunk->tag & GC_TAG_ROOT) { - chunk->tag &= ~GC_TAG_ROOT; - } - chunk = chunk->next; - } - } -} - -size_t gc_stop(GarbageCollector *gc) -{ - gc_unroot_roots(gc); - size_t collected = gc_sweep(gc); - gc_allocation_map_delete(gc->allocs); - return collected; -} - -size_t gc_run(GarbageCollector *gc) -{ - LOG_DEBUG("Initiating GC run (gc@%p)", (void *)gc); - gc_mark(gc); - return gc_sweep(gc); -} - -char *gc_strdup(GarbageCollector *gc, const char *s) -{ - size_t len = strlen(s) + 1; - void *new = gc_malloc(gc, len); - - if (new == NULL) { - return NULL; - } - return (char *)memcpy(new, s, len); -} |