From 2477e4e3b84b23b091befa1869a1fc6186fe74dc Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Tue, 27 Feb 2024 19:42:41 +0100 Subject: [PATCH] libexpr: Use GC_set_sp_corrector instead of patch Manually tested by printing to stderr in both branches (sp in os stack, or not), and triggering a GC in a filterSource function, e.g.: let generateTree = n: if n == 0 then "ha" else { left = generateTree (n - 1); right = generateTree (n - 1); }; in builtins.deepSeq (generateTree 18) ... Note that the darwin still uses the strategy of disabling GC, despite having an implementation that compiles. The proper solution will be enabled and tested later. --- configure.ac | 2 +- .../boehmgc-coroutine-sp-fallback.diff | 99 ------------------- flake.nix | 2 - src/libexpr/eval.cc | 66 +++++++++++-- src/libexpr/local.mk | 4 +- 5 files changed, 62 insertions(+), 111 deletions(-) delete mode 100644 dep-patches/boehmgc-coroutine-sp-fallback.diff diff --git a/configure.ac b/configure.ac index 90a6d45d5..0a015fe48 100644 --- a/configure.ac +++ b/configure.ac @@ -366,7 +366,7 @@ fi AC_ARG_ENABLE(gc, AS_HELP_STRING([--enable-gc],[enable garbage collection in the Nix expression evaluator (requires Boehm GC) [default=yes]]), gc=$enableval, gc=yes) if test "$gc" = yes; then - PKG_CHECK_MODULES([BDW_GC], [bdw-gc]) + PKG_CHECK_MODULES([BDW_GC], [bdw-gc >= 8.2.4]) CXXFLAGS="$BDW_GC_CFLAGS $CXXFLAGS" AC_DEFINE(HAVE_BOEHMGC, 1, [Whether to use the Boehm garbage collector.]) fi diff --git a/dep-patches/boehmgc-coroutine-sp-fallback.diff b/dep-patches/boehmgc-coroutine-sp-fallback.diff deleted file mode 100644 index 2afbe9671..000000000 --- a/dep-patches/boehmgc-coroutine-sp-fallback.diff +++ /dev/null @@ -1,99 +0,0 @@ -diff --git a/darwin_stop_world.c b/darwin_stop_world.c -index 0468aaec..b348d869 100644 ---- a/darwin_stop_world.c -+++ b/darwin_stop_world.c -@@ -356,6 +356,7 @@ GC_INNER void GC_push_all_stacks(void) - int nthreads = 0; - word total_size = 0; - mach_msg_type_number_t listcount = (mach_msg_type_number_t)THREAD_TABLE_SZ; -+ size_t stack_limit; - if (!EXPECT(GC_thr_initialized, TRUE)) - GC_thr_init(); - -@@ -411,6 +412,19 @@ GC_INNER void GC_push_all_stacks(void) - GC_push_all_stack_sections(lo, hi, p->traced_stack_sect); - } - if (altstack_lo) { -+ // When a thread goes into a coroutine, we lose its original sp until -+ // control flow returns to the thread. -+ // While in the coroutine, the sp points outside the thread stack, -+ // so we can detect this and push the entire thread stack instead, -+ // as an approximation. -+ // We assume that the coroutine has similarly added its entire stack. -+ // This could be made accurate by cooperating with the application -+ // via new functions and/or callbacks. -+ stack_limit = pthread_get_stacksize_np(p->id); -+ if (altstack_lo >= altstack_hi || altstack_lo < altstack_hi - stack_limit) { // sp outside stack -+ altstack_lo = altstack_hi - stack_limit; -+ } -+ - total_size += altstack_hi - altstack_lo; - GC_push_all_stack(altstack_lo, altstack_hi); - } -diff --git a/include/gc.h b/include/gc.h -index edab6c22..f2c61282 100644 ---- a/include/gc.h -+++ b/include/gc.h -@@ -2172,6 +2172,11 @@ GC_API void GC_CALL GC_win32_free_heap(void); - (*GC_amiga_allocwrapper_do)(a,GC_malloc_atomic_ignore_off_page) - #endif /* _AMIGA && !GC_AMIGA_MAKINGLIB */ - -+#if !__APPLE__ -+/* Patch doesn't work on apple */ -+#define NIX_BOEHM_PATCH_VERSION 1 -+#endif -+ - #ifdef __cplusplus - } /* extern "C" */ - #endif -diff --git a/pthread_stop_world.c b/pthread_stop_world.c -index b5d71e62..aed7b0bf 100644 ---- a/pthread_stop_world.c -+++ b/pthread_stop_world.c -@@ -768,6 +768,8 @@ STATIC void GC_restart_handler(int sig) - /* world is stopped. Should not fail if it isn't. */ - GC_INNER void GC_push_all_stacks(void) - { -+ size_t stack_limit; -+ pthread_attr_t pattr; - GC_bool found_me = FALSE; - size_t nthreads = 0; - int i; -@@ -851,6 +853,37 @@ GC_INNER void GC_push_all_stacks(void) - hi = p->altstack + p->altstack_size; - /* FIXME: Need to scan the normal stack too, but how ? */ - /* FIXME: Assume stack grows down */ -+ } else { -+#ifdef HAVE_PTHREAD_ATTR_GET_NP -+ if (!pthread_attr_init(&pattr) -+ || !pthread_attr_get_np(p->id, &pattr)) -+#else /* HAVE_PTHREAD_GETATTR_NP */ -+ if (pthread_getattr_np(p->id, &pattr)) -+#endif -+ { -+ ABORT("GC_push_all_stacks: pthread_getattr_np failed!"); -+ } -+ if (pthread_attr_getstacksize(&pattr, &stack_limit)) { -+ ABORT("GC_push_all_stacks: pthread_attr_getstacksize failed!"); -+ } -+ if (pthread_attr_destroy(&pattr)) { -+ ABORT("GC_push_all_stacks: pthread_attr_destroy failed!"); -+ } -+ // When a thread goes into a coroutine, we lose its original sp until -+ // control flow returns to the thread. -+ // While in the coroutine, the sp points outside the thread stack, -+ // so we can detect this and push the entire thread stack instead, -+ // as an approximation. -+ // We assume that the coroutine has similarly added its entire stack. -+ // This could be made accurate by cooperating with the application -+ // via new functions and/or callbacks. -+ #ifndef STACK_GROWS_UP -+ if (lo >= hi || lo < hi - stack_limit) { // sp outside stack -+ lo = hi - stack_limit; -+ } -+ #else -+ #error "STACK_GROWS_UP not supported in boost_coroutine2 (as of june 2021), so we don't support it in Nix." -+ #endif - } - GC_push_all_stack_sections(lo, hi, traced_stack_sect); - # ifdef STACK_GROWS_UP diff --git a/flake.nix b/flake.nix index a7bedd819..7a07a70ba 100644 --- a/flake.nix +++ b/flake.nix @@ -150,8 +150,6 @@ enableLargeConfig = true; }).overrideAttrs(o: { patches = (o.patches or []) ++ [ - ./dep-patches/boehmgc-coroutine-sp-fallback.diff - # https://github.com/ivmai/bdwgc/pull/586 ./dep-patches/boehmgc-traceable_allocator-public.diff ]; diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index c1dadeee0..30521b072 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,50 @@ class BoehmGCStackAllocator : public StackAllocator { static BoehmGCStackAllocator boehmGCStackAllocator; +/** + * When a thread goes into a coroutine, we lose its original sp until + * control flow returns to the thread. + * While in the coroutine, the sp points outside the thread stack, + * so we can detect this and push the entire thread stack instead, + * as an approximation. + * The coroutine's stack is covered by `BoehmGCStackAllocator`. + * This is not an optimal solution, because the garbage is scanned when a + * coroutine is active, for both the coroutine and the original thread stack. + * However, the implementation is quite lean, and usually we don't have active + * coroutines during evaluation, so this is acceptable. + */ +void fixupBoehmStackPointer(void ** sp_ptr, void * pthread_id) { + void *& sp = *sp_ptr; + pthread_attr_t pattr; + size_t osStackSize; + void * osStackLow; + void * osStackBase; + + #ifdef __APPLE__ + osStackSize = pthread_get_stacksize_np((pthread_t)pthread_id); + osStackLow = pthread_get_stackaddr_np((pthread_t)pthread_id); + #else + if (pthread_attr_init(&pattr)) { + throw Error("fixupBoehmStackPointer: pthread_attr_init failed"); + } + if (pthread_getattr_np((pthread_t)pthread_id, &pattr)) { + throw Error("fixupBoehmStackPointer: pthread_getattr_np failed"); + } + if (pthread_attr_getstack(&pattr, &osStackLow, &osStackSize)) { + throw Error("fixupBoehmStackPointer: pthread_attr_getstack failed"); + } + if (pthread_attr_destroy(&pattr)) { + throw Error("fixupBoehmStackPointer: pthread_attr_destroy failed"); + } + #endif + osStackBase = (char *)osStackLow + osStackSize; + // NOTE: We assume the stack grows down, as it does on all architectures we support. + // Architectures that grow the stack up are rare. + if (sp >= osStackBase || sp < osStackLow) { // lo is outside the os stack + sp = osStackBase; + } +} + #endif @@ -303,16 +348,21 @@ void initGC() GC_set_oom_fn(oomHandler); + // TODO: Comment suggests an implementation that works on darwin and windows + // https://github.com/ivmai/bdwgc/issues/362#issuecomment-1936672196 + #ifndef __APPLE__ + GC_set_sp_corrector(&fixupBoehmStackPointer); + #endif + StackAllocator::defaultAllocator = &boehmGCStackAllocator; - -#if NIX_BOEHM_PATCH_VERSION != 1 - printTalkative("Unpatched BoehmGC, disabling GC inside coroutines"); - /* Used to disable GC when entering coroutines on macOS */ - create_coro_gc_hook = []() -> std::shared_ptr { - return std::make_shared(); - }; -#endif + if (!GC_get_sp_corrector()) { + printTalkative("BoehmGC on this platform does not support sp_corrector; will disable GC inside coroutines"); + /* Used to disable GC when entering coroutines on macOS */ + create_coro_gc_hook = []() -> std::shared_ptr { + return std::make_shared(); + }; + } /* Set the initial heap size to something fairly big (25% of physical RAM, up to a maximum of 384 MiB) so that in most cases diff --git a/src/libexpr/local.mk b/src/libexpr/local.mk index ecadc5e5d..d128064a5 100644 --- a/src/libexpr/local.mk +++ b/src/libexpr/local.mk @@ -15,7 +15,9 @@ libexpr_SOURCES := \ INCLUDE_libexpr := -I $(d) -libexpr_CXXFLAGS += $(INCLUDE_libutil) $(INCLUDE_libstore) $(INCLUDE_libfetchers) $(INCLUDE_libmain) $(INCLUDE_libexpr) +libexpr_CXXFLAGS += \ + $(INCLUDE_libutil) $(INCLUDE_libstore) $(INCLUDE_libfetchers) $(INCLUDE_libmain) $(INCLUDE_libexpr) \ + -DGC_THREADS libexpr_LIBS = libutil libstore libfetchers