diff options
Diffstat (limited to 'src/gc.c')
-rw-r--r-- | src/gc.c | 650 |
1 files changed, 650 insertions, 0 deletions
diff --git a/src/gc.c b/src/gc.c new file mode 100644 index 0000000..688478e --- /dev/null +++ b/src/gc.c @@ -0,0 +1,650 @@ +#include <gc.h> + +#include <errno.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +//#include "primes.h" + +const char *log_level_strings[] = { "CRIT", "WARN", "INFO", "DEBG", "NONE" }; + +/* + * Set log level for this compilation unit. If set to LOGLEVEL_DEBUG, + * the garbage collector will be very chatty. + */ +#undef LOGLEVEL +#define LOGLEVEL LOGLEVEL_INFO + +/* + * The size of a pointer. + */ +#define PTRSIZE sizeof(char *) + +/* + * Allocations can temporarily be tagged as "marked" an part of the + * mark-and-sweep implementation or can be tagged as "roots" which are + * not automatically garbage collected. The latter allows the implementation + * of global variables. + */ +#define GC_TAG_NONE 0x0 +#define GC_TAG_ROOT 0x1 +#define GC_TAG_MARK 0x2 + +/* + * Support for windows c compiler is added by adding this macro. + * Tested on: Microsoft (R) C/C++ Optimizing Compiler Version 19.24.28314 for x86 + */ +#if defined(_MSC_VER) +#define __builtin_frame_address(x) ((void)(x), _AddressOfReturnAddress()) +#endif + +/* + * Define a globally available GC object; this allows all code that + * includes the gc.h header to access a global static garbage collector. + * Convenient for single-threaded code, insufficient for multi-threaded + * use cases. Use the GC_NO_GLOBAL_GC flag to toggle. + */ +#ifndef GC_NO_GLOBAL_GC +GarbageCollector gc; // global GC object +#endif + +static bool is_prime(size_t n) +{ + /* https://stackoverflow.com/questions/1538644/c-determine-if-a-number-is-prime */ + if (n <= 3) + return n > 1; // as 2 and 3 are prime + else if (n % 2 == 0 || n % 3 == 0) + return false; // check if n is divisible by 2 or 3 + else { + for (size_t i = 5; i * i <= n; i += 6) { + if (n % i == 0 || n % (i + 2) == 0) + return false; + } + return true; + } +} + +static size_t next_prime(size_t n) +{ + while (!is_prime(n)) + ++n; + return n; +} + +/** + * The allocation object. + * + * The allocation object holds all metadata for a memory location + * in one place. + */ +typedef struct Allocation { + void *ptr; // mem pointer + size_t size; // allocated size in bytes + char tag; // the tag for mark-and-sweep + void (*dtor)(void *); // destructor + struct Allocation *next; // separate chaining +} Allocation; + +/** + * Create a new allocation object. + * + * Creates a new allocation object using the system `malloc`. + * + * @param[in] ptr The pointer to the memory to manage. + * @param[in] size The size of the memory range pointed to by `ptr`. + * @param[in] dtor A pointer to a destructor function that should be called + * before freeing the memory pointed to by `ptr`. + * @returns Pointer to the new allocation instance. + */ +static Allocation *gc_allocation_new(void *ptr, size_t size, + void (*dtor)(void *)) +{ + Allocation *a = (Allocation *)malloc(sizeof(Allocation)); + a->ptr = ptr; + a->size = size; + a->tag = GC_TAG_NONE; + a->dtor = dtor; + a->next = NULL; + return a; +} + +/** + * Delete an allocation object. + * + * Deletes the allocation object pointed to by `a`, but does *not* + * free the memory pointed to by `a->ptr`. + * + * @param a The allocation object to delete. + */ +static void gc_allocation_delete(Allocation *a) +{ + free(a); +} + +/** + * The allocation hash map. + * + * The core data structure is a hash map that holds the allocation + * objects and allows O(1) retrieval given the memory location. Collision + * resolution is implemented using separate chaining. + */ +typedef struct AllocationMap { + size_t capacity; + size_t min_capacity; + double downsize_factor; + double upsize_factor; + double sweep_factor; + size_t sweep_limit; + size_t size; + Allocation **allocs; +} AllocationMap; + +/** + * Determine the current load factor of an `AllocationMap`. + * + * Calculates the load factor of the hash map as the quotient of the size and + * the capacity of the hash map. + * + * @param am The allocationo map to calculate the load factor for. + * @returns The load factor of the allocation map `am`. + */ +static double gc_allocation_map_load_factor(AllocationMap *am) +{ + return (double)am->size / (double)am->capacity; +} + +static AllocationMap * +gc_allocation_map_new(size_t min_capacity, size_t capacity, double sweep_factor, + double downsize_factor, double upsize_factor) +{ + AllocationMap *am = (AllocationMap *)malloc(sizeof(AllocationMap)); + am->min_capacity = next_prime(min_capacity); + am->capacity = next_prime(capacity); + if (am->capacity < am->min_capacity) + am->capacity = am->min_capacity; + am->sweep_factor = sweep_factor; + am->sweep_limit = (int)(sweep_factor * am->capacity); + am->downsize_factor = downsize_factor; + am->upsize_factor = upsize_factor; + am->allocs = (Allocation **)calloc(am->capacity, sizeof(Allocation *)); + am->size = 0; + LOG_DEBUG("Created allocation map (cap=%ld, siz=%ld)", am->capacity, + am->size); + return am; +} + +static void gc_allocation_map_delete(AllocationMap *am) +{ + // Iterate over the map + LOG_DEBUG("Deleting allocation map (cap=%ld, siz=%ld)", am->capacity, + am->size); + Allocation *alloc, *tmp; + for (size_t i = 0; i < am->capacity; ++i) { + if ((alloc = am->allocs[i])) { + // Make sure to follow the chain inside a bucket + while (alloc) { + tmp = alloc; + alloc = alloc->next; + // free the management structure + gc_allocation_delete(tmp); + } + } + } + free(am->allocs); + free(am); +} + +static size_t gc_hash(void *ptr) +{ + return ((uintptr_t)ptr) >> 3; +} + +static void gc_allocation_map_resize(AllocationMap *am, size_t new_capacity) +{ + if (new_capacity <= am->min_capacity) { + return; + } + // Replaces the existing items array in the hash table + // with a resized one and pushes items into the new, correct buckets + LOG_DEBUG("Resizing allocation map (cap=%ld, siz=%ld) -> (cap=%ld)", + am->capacity, am->size, new_capacity); + Allocation **resized_allocs = + calloc(new_capacity, sizeof(Allocation *)); + + for (size_t i = 0; i < am->capacity; ++i) { + Allocation *alloc = am->allocs[i]; + while (alloc) { + Allocation *next_alloc = alloc->next; + size_t new_index = gc_hash(alloc->ptr) % new_capacity; + alloc->next = resized_allocs[new_index]; + resized_allocs[new_index] = alloc; + alloc = next_alloc; + } + } + free(am->allocs); + am->capacity = new_capacity; + am->allocs = resized_allocs; + am->sweep_limit = + am->size + am->sweep_factor * (am->capacity - am->size); +} + +static bool gc_allocation_map_resize_to_fit(AllocationMap *am) +{ + double load_factor = gc_allocation_map_load_factor(am); + if (load_factor > am->upsize_factor) { + LOG_DEBUG("Load factor %0.3g > %0.3g. Triggering upsize.", + load_factor, am->upsize_factor); + gc_allocation_map_resize(am, next_prime(am->capacity * 2)); + return true; + } + if (load_factor < am->downsize_factor) { + LOG_DEBUG("Load factor %0.3g < %0.3g. Triggering downsize.", + load_factor, am->downsize_factor); + gc_allocation_map_resize(am, next_prime(am->capacity / 2)); + return true; + } + return false; +} + +static Allocation *gc_allocation_map_get(AllocationMap *am, void *ptr) +{ + size_t index = gc_hash(ptr) % am->capacity; + Allocation *cur = am->allocs[index]; + while (cur) { + if (cur->ptr == ptr) { + return cur; + } + cur = cur->next; + } + return NULL; +} + +static Allocation *gc_allocation_map_put(AllocationMap *am, void *ptr, + size_t size, void (*dtor)(void *)) +{ + size_t index = gc_hash(ptr) % am->capacity; + LOG_DEBUG("PUT request for allocation ix=%ld", index); + Allocation *alloc = gc_allocation_new(ptr, size, dtor); + Allocation *cur = am->allocs[index]; + Allocation *prev = NULL; + /* Upsert if ptr is already known (e.g. dtor update). */ + while (cur != NULL) { + if (cur->ptr == ptr) { + // found it + alloc->next = cur->next; + if (!prev) { + // position 0 + am->allocs[index] = alloc; + } else { + // in the list + prev->next = alloc; + } + gc_allocation_delete(cur); + LOG_DEBUG("AllocationMap Upsert at ix=%ld", index); + return alloc; + } + prev = cur; + cur = cur->next; + } + /* Insert at the front of the separate chaining list */ + cur = am->allocs[index]; + alloc->next = cur; + am->allocs[index] = alloc; + am->size++; + LOG_DEBUG("AllocationMap insert at ix=%ld", index); + void *p = alloc->ptr; + if (gc_allocation_map_resize_to_fit(am)) { + alloc = gc_allocation_map_get(am, p); + } + return alloc; +} + +static void gc_allocation_map_remove(AllocationMap *am, void *ptr, + bool allow_resize) +{ + // ignores unknown keys + size_t index = gc_hash(ptr) % am->capacity; + Allocation *cur = am->allocs[index]; + Allocation *prev = NULL; + Allocation *next; + while (cur != NULL) { + next = cur->next; + if (cur->ptr == ptr) { + // found it + if (!prev) { + // first item in list + am->allocs[index] = cur->next; + } else { + // not the first item in the list + prev->next = cur->next; + } + gc_allocation_delete(cur); + am->size--; + } else { + // move on + prev = cur; + } + cur = next; + } + if (allow_resize) { + gc_allocation_map_resize_to_fit(am); + } +} + +static void *gc_mcalloc(size_t count, size_t size) +{ + if (!count) + return malloc(size); + return calloc(count, size); +} + +static bool gc_needs_sweep(GarbageCollector *gc) +{ + return gc->allocs->size > gc->allocs->sweep_limit; +} + +static void *gc_allocate(GarbageCollector *gc, size_t count, size_t size, + void (*dtor)(void *)) +{ + /* Allocation logic that generalizes over malloc/calloc. */ + + /* Check if we reached the high-water mark and need to clean up */ + if (gc_needs_sweep(gc) && !gc->paused) { + size_t freed_mem = gc_run(gc); + LOG_DEBUG("Garbage collection cleaned up %lu bytes.", + freed_mem); + } + /* With cleanup out of the way, attempt to allocate memory */ + void *ptr = gc_mcalloc(count, size); + size_t alloc_size = count ? count * size : size; + /* If allocation fails, force an out-of-policy run to free some memory and try again. */ + if (!ptr && !gc->paused && (errno == EAGAIN || errno == ENOMEM)) { + gc_run(gc); + ptr = gc_mcalloc(count, size); + } + /* Start managing the memory we received from the system */ + if (ptr) { + LOG_DEBUG("Allocated %zu bytes at %p", alloc_size, (void *)ptr); + Allocation *alloc = gc_allocation_map_put(gc->allocs, ptr, + alloc_size, dtor); + /* Deal with metadata allocation failure */ + if (alloc) { + LOG_DEBUG("Managing %zu bytes at %p", alloc_size, + (void *)alloc->ptr); + ptr = alloc->ptr; + } else { + /* We failed to allocate the metadata, fail cleanly. */ + free(ptr); + ptr = NULL; + } + } + return ptr; +} + +static void gc_make_root(GarbageCollector *gc, void *ptr) +{ + Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr); + if (alloc) { + alloc->tag |= GC_TAG_ROOT; + } +} + +void *gc_malloc(GarbageCollector *gc, size_t size) +{ + return gc_malloc_ext(gc, size, NULL); +} + +void *gc_malloc_static(GarbageCollector *gc, size_t size, void (*dtor)(void *)) +{ + void *ptr = gc_malloc_ext(gc, size, dtor); + gc_make_root(gc, ptr); + return ptr; +} + +void *gc_make_static(GarbageCollector *gc, void *ptr) +{ + gc_make_root(gc, ptr); + return ptr; +} + +void *gc_malloc_ext(GarbageCollector *gc, size_t size, void (*dtor)(void *)) +{ + return gc_allocate(gc, 0, size, dtor); +} + +void *gc_calloc(GarbageCollector *gc, size_t count, size_t size) +{ + return gc_calloc_ext(gc, count, size, NULL); +} + +void *gc_calloc_ext(GarbageCollector *gc, size_t count, size_t size, + void (*dtor)(void *)) +{ + return gc_allocate(gc, count, size, dtor); +} + +void *gc_realloc(GarbageCollector *gc, void *p, size_t size) +{ + Allocation *alloc = gc_allocation_map_get(gc->allocs, p); + if (p && !alloc) { + // the user passed an unknown pointer + errno = EINVAL; + return NULL; + } + void *q = realloc(p, size); + if (!q) { + // realloc failed but p is still valid + return NULL; + } + if (!p) { + // allocation, not reallocation + Allocation *alloc = + gc_allocation_map_put(gc->allocs, q, size, NULL); + return alloc->ptr; + } + if (p == q) { + // successful reallocation w/o copy + alloc->size = size; + } else { + // successful reallocation w/ copy + void (*dtor)(void *) = alloc->dtor; + gc_allocation_map_remove(gc->allocs, p, true); + gc_allocation_map_put(gc->allocs, q, size, dtor); + } + return q; +} + +void gc_free(GarbageCollector *gc, void *ptr) +{ + Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr); + if (alloc) { + if (alloc->dtor) { + alloc->dtor(ptr); + } + free(ptr); + gc_allocation_map_remove(gc->allocs, ptr, true); + } else { + LOG_WARNING("Ignoring request to free unknown pointer %p", + (void *)ptr); + } +} + +void gc_start(GarbageCollector *gc, void *bos) +{ + gc_start_ext(gc, bos, 1024, 1024, 0.2, 0.8, 0.5); +} + +void gc_start_ext(GarbageCollector *gc, void *bos, size_t initial_capacity, + size_t min_capacity, double downsize_load_factor, + double upsize_load_factor, double sweep_factor) +{ + double downsize_limit = + downsize_load_factor > 0.0 ? downsize_load_factor : 0.2; + double upsize_limit = + upsize_load_factor > 0.0 ? upsize_load_factor : 0.8; + sweep_factor = sweep_factor > 0.0 ? sweep_factor : 0.5; + gc->paused = false; + gc->bos = bos; + initial_capacity = initial_capacity < min_capacity ? min_capacity : + initial_capacity; + gc->allocs = gc_allocation_map_new(min_capacity, initial_capacity, + sweep_factor, downsize_limit, + upsize_limit); + LOG_DEBUG("Created new garbage collector (cap=%ld, siz=%ld).", + gc->allocs->capacity, gc->allocs->size); +} + +void gc_pause(GarbageCollector *gc) +{ + gc->paused = true; +} + +void gc_resume(GarbageCollector *gc) +{ + gc->paused = false; +} + +void gc_mark_alloc(GarbageCollector *gc, void *ptr) +{ + Allocation *alloc = gc_allocation_map_get(gc->allocs, ptr); + /* Mark if alloc exists and is not tagged already, otherwise skip */ + if (alloc && !(alloc->tag & GC_TAG_MARK)) { + LOG_DEBUG("Marking allocation (ptr=%p)", ptr); + alloc->tag |= GC_TAG_MARK; + /* Iterate over allocation contents and mark them as well */ + LOG_DEBUG("Checking allocation (ptr=%p, size=%lu) contents", + ptr, alloc->size); + for (char *p = (char *)alloc->ptr; + p <= (char *)alloc->ptr + alloc->size - PTRSIZE; ++p) { + LOG_DEBUG( + "Checking allocation (ptr=%p) @%lu with value %p", + ptr, p - ((char *)alloc->ptr), *(void **)p); + gc_mark_alloc(gc, *(void **)p); + } + } +} + +void gc_mark_stack(GarbageCollector *gc) +{ + LOG_DEBUG("Marking the stack (gc@%p) in increments of %ld", (void *)gc, + sizeof(char)); + void *tos = __builtin_frame_address(0); + void *bos = gc->bos; + /* The stack grows towards smaller memory addresses, hence we scan tos->bos. + * Stop scanning once the distance between tos & bos is too small to hold a valid pointer */ + for (char *p = (char *)tos; p <= (char *)bos - PTRSIZE; ++p) { + gc_mark_alloc(gc, *(void **)p); + } +} + +void gc_mark_roots(GarbageCollector *gc) +{ + LOG_DEBUG("Marking roots%s", ""); + for (size_t i = 0; i < gc->allocs->capacity; ++i) { + Allocation *chunk = gc->allocs->allocs[i]; + while (chunk) { + if (chunk->tag & GC_TAG_ROOT) { + LOG_DEBUG("Marking root @ %p", chunk->ptr); + gc_mark_alloc(gc, chunk->ptr); + } + chunk = chunk->next; + } + } +} + +void gc_mark(GarbageCollector *gc) +{ + /* Note: We only look at the stack and the heap, and ignore BSS. */ + LOG_DEBUG("Initiating GC mark (gc@%p)", (void *)gc); + /* Scan the heap for roots */ + gc_mark_roots(gc); + /* Dump registers onto stack and scan the stack */ + void (*volatile _mark_stack)(GarbageCollector *) = gc_mark_stack; + jmp_buf ctx; + memset(&ctx, 0, sizeof(jmp_buf)); + setjmp(ctx); + _mark_stack(gc); +} + +size_t gc_sweep(GarbageCollector *gc) +{ + LOG_DEBUG("Initiating GC sweep (gc@%p)", (void *)gc); + size_t total = 0; + for (size_t i = 0; i < gc->allocs->capacity; ++i) { + Allocation *chunk = gc->allocs->allocs[i]; + Allocation *next = NULL; + /* Iterate over separate chaining */ + while (chunk) { + if (chunk->tag & GC_TAG_MARK) { + LOG_DEBUG("Found used allocation %p (ptr=%p)", + (void *)chunk, (void *)chunk->ptr); + /* unmark */ + chunk->tag &= ~GC_TAG_MARK; + chunk = chunk->next; + } else { + LOG_DEBUG( + "Found unused allocation %p (%lu bytes @ ptr=%p)", + (void *)chunk, chunk->size, + (void *)chunk->ptr); + /* no reference to this chunk, hence delete it */ + total += chunk->size; + if (chunk->dtor) { + chunk->dtor(chunk->ptr); + } + free(chunk->ptr); + /* and remove it from the bookkeeping */ + next = chunk->next; + gc_allocation_map_remove(gc->allocs, chunk->ptr, + false); + chunk = next; + } + } + } + gc_allocation_map_resize_to_fit(gc->allocs); + return total; +} + +/** + * Unset the ROOT tag on all roots on the heap. + * + * @param gc A pointer to a garbage collector instance. + */ +void gc_unroot_roots(GarbageCollector *gc) +{ + LOG_DEBUG("Unmarking roots%s", ""); + for (size_t i = 0; i < gc->allocs->capacity; ++i) { + Allocation *chunk = gc->allocs->allocs[i]; + while (chunk) { + if (chunk->tag & GC_TAG_ROOT) { + chunk->tag &= ~GC_TAG_ROOT; + } + chunk = chunk->next; + } + } +} + +size_t gc_stop(GarbageCollector *gc) +{ + gc_unroot_roots(gc); + size_t collected = gc_sweep(gc); + gc_allocation_map_delete(gc->allocs); + return collected; +} + +size_t gc_run(GarbageCollector *gc) +{ + LOG_DEBUG("Initiating GC run (gc@%p)", (void *)gc); + gc_mark(gc); + return gc_sweep(gc); +} + +char *gc_strdup(GarbageCollector *gc, const char *s) +{ + size_t len = strlen(s) + 1; + void *new = gc_malloc(gc, len); + + if (new == NULL) { + return NULL; + } + return (char *)memcpy(new, s, len); +} |