From f279ba84ee04323a69d82d09fb2b71b7c4790b03 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Wed, 8 Mar 2023 10:57:22 +0800 Subject: [PATCH] Fix multi-threading issues (#2013) - Implement atomic.fence to ensure a proper memory synchronization order - Destroy exec_env_singleton first in wasm/aot deinstantiation - Change terminate other threads to wait for other threads in wasm_exec_env_destroy - Fix detach thread in thread_manager_start_routine - Fix duplicated lock cluster->lock in wasm_cluster_cancel_thread - Add lib-pthread and lib-wasi-threads compilation to Windows CI --- .github/workflows/compilation_on_windows.yml | 14 +++++++ core/iwasm/aot/aot_runtime.c | 12 ++++-- core/iwasm/common/wasm_application.c | 24 +----------- core/iwasm/common/wasm_exec_env.c | 6 +-- core/iwasm/compilation/aot_compiler.c | 2 + core/iwasm/compilation/aot_emit_memory.c | 9 +++++ core/iwasm/compilation/aot_emit_memory.h | 4 ++ core/iwasm/interpreter/wasm_interp_classic.c | 4 +- core/iwasm/interpreter/wasm_interp_fast.c | 5 +++ core/iwasm/interpreter/wasm_runtime.c | 13 +++++-- .../libraries/thread-mgr/thread_manager.c | 39 +++++++++++++++---- .../platform/include/platform_api_extension.h | 35 +++++++++++++++++ .../platform/linux-sgx/platform_internal.h | 3 ++ .../platform/windows/platform_internal.h | 14 +++++++ .../platform/windows/shared_platform.cmake | 3 +- core/shared/platform/windows/win_atomic.cpp | 22 +++++++++++ 16 files changed, 167 insertions(+), 42 deletions(-) create mode 100644 core/shared/platform/windows/win_atomic.cpp diff --git a/.github/workflows/compilation_on_windows.yml b/.github/workflows/compilation_on_windows.yml index 56e913e3..073f206e 100644 --- a/.github/workflows/compilation_on_windows.yml +++ b/.github/workflows/compilation_on_windows.yml @@ -118,3 +118,17 @@ jobs: cmake .. -DWAMR_BUILD_DEBUG_INTERP=1 cmake --build . --config Release --parallel 4 cd .. && rm -force -r build + - name: Build iwasm [lib pthread] + run: | + cd product-mini/platforms/windows + mkdir build && cd build + cmake .. -DWAMR_BUILD_LIB_PTHREAD=1 + cmake --build . --config Release --parallel 4 + cd .. && rm -force -r build + - name: Build iwasm [lib wasi-thread] + run: | + cd product-mini/platforms/windows + mkdir build && cd build + cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1 + cmake --build . --config Release --parallel 4 + cd .. && rm -force -r build diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index 5c56bb55..ca2ad4f7 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -1226,6 +1226,15 @@ fail: void aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst) { + if (module_inst->exec_env_singleton) { + /* wasm_exec_env_destroy will call + wasm_cluster_wait_for_all_except_self to wait for other + threads, so as to destroy their exec_envs and module + instances first, and avoid accessing the shared resources + of current module instance after it is deinstantiated. */ + wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton); + } + #if WASM_ENABLE_LIBC_WASI != 0 /* Destroy wasi resource before freeing app heap, since some fields of wasi contex are allocated from app heap, and if app heap is freed, @@ -1264,9 +1273,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst) if (module_inst->func_type_indexes) wasm_runtime_free(module_inst->func_type_indexes); - if (module_inst->exec_env_singleton) - wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton); - if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports) wasm_runtime_free( ((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports); diff --git a/core/iwasm/common/wasm_application.c b/core/iwasm/common/wasm_application.c index 8445652f..1bb9f29b 100644 --- a/core/iwasm/common/wasm_application.c +++ b/core/iwasm/common/wasm_application.c @@ -203,22 +203,12 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst, int32 argc, char *argv[]) { bool ret; -#if WASM_ENABLE_THREAD_MGR != 0 - WASMCluster *cluster; -#endif -#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0 +#if WASM_ENABLE_MEMORY_PROFILING != 0 WASMExecEnv *exec_env; #endif ret = execute_main(module_inst, argc, argv); -#if WASM_ENABLE_THREAD_MGR != 0 - exec_env = wasm_runtime_get_exec_env_singleton(module_inst); - if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) { - wasm_cluster_wait_for_all_except_self(cluster, exec_env); - } -#endif - #if WASM_ENABLE_MEMORY_PROFILING != 0 exec_env = wasm_runtime_get_exec_env_singleton(module_inst); if (exec_env) { @@ -622,22 +612,12 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst, const char *name, int32 argc, char *argv[]) { bool ret; -#if WASM_ENABLE_THREAD_MGR != 0 - WASMCluster *cluster; -#endif -#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0 +#if WASM_ENABLE_MEMORY_PROFILING != 0 WASMExecEnv *exec_env; #endif ret = execute_func(module_inst, name, argc, argv); -#if WASM_ENABLE_THREAD_MGR != 0 - exec_env = wasm_runtime_get_exec_env_singleton(module_inst); - if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) { - wasm_cluster_wait_for_all_except_self(cluster, exec_env); - } -#endif - #if WASM_ENABLE_MEMORY_PROFILING != 0 exec_env = wasm_runtime_get_exec_env_singleton(module_inst); if (exec_env) { diff --git a/core/iwasm/common/wasm_exec_env.c b/core/iwasm/common/wasm_exec_env.c index 9cda929b..622bcd71 100644 --- a/core/iwasm/common/wasm_exec_env.c +++ b/core/iwasm/common/wasm_exec_env.c @@ -172,16 +172,16 @@ void wasm_exec_env_destroy(WASMExecEnv *exec_env) { #if WASM_ENABLE_THREAD_MGR != 0 - /* Terminate all sub-threads */ + /* Wait for all sub-threads */ WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env); if (cluster) { - wasm_cluster_terminate_all_except_self(cluster, exec_env); + wasm_cluster_wait_for_all_except_self(cluster, exec_env); #if WASM_ENABLE_DEBUG_INTERP != 0 /* Must fire exit event after other threads exits, otherwise the stopped thread will be overrided by other threads */ wasm_cluster_thread_exited(exec_env); #endif - /* We have terminated other threads, this is the only alive thread, so + /* We have waited for other threads, this is the only alive thread, so * we don't acquire cluster->lock because the cluster will be destroyed * inside this function */ wasm_cluster_del_exec_env(cluster, exec_env); diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c index 925371b1..06235fe3 100644 --- a/core/iwasm/compilation/aot_compiler.c +++ b/core/iwasm/compilation/aot_compiler.c @@ -1240,6 +1240,8 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index) case WASM_OP_ATOMIC_FENCE: /* Skip memory index */ frame_ip++; + if (!aot_compiler_op_atomic_fence(comp_ctx, func_ctx)) + return false; break; case WASM_OP_ATOMIC_I32_LOAD: bytes = 4; diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c index 26d9f922..4da4cc80 100644 --- a/core/iwasm/compilation/aot_emit_memory.c +++ b/core/iwasm/compilation/aot_emit_memory.c @@ -1423,4 +1423,13 @@ fail: return false; } +bool +aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + return LLVMBuildFence(comp_ctx->builder, + LLVMAtomicOrderingSequentiallyConsistent, false, "") + ? true + : false; +} + #endif /* end of WASM_ENABLE_SHARED_MEMORY */ diff --git a/core/iwasm/compilation/aot_emit_memory.h b/core/iwasm/compilation/aot_emit_memory.h index 2c39f725..e49582e3 100644 --- a/core/iwasm/compilation/aot_emit_memory.h +++ b/core/iwasm/compilation/aot_emit_memory.h @@ -97,6 +97,10 @@ bool aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, uint32 offset, uint32 bytes); + +bool +aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx); #endif #ifdef __cplusplus diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 9927d1b2..dd45865b 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -3467,6 +3467,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, { /* Skip the memory index */ frame_ip++; + os_atomic_thread_fence(os_memory_order_release); break; } @@ -4006,7 +4007,8 @@ fast_jit_call_func_bytecode(WASMModuleInstance *module_inst, module_inst->fast_jit_func_ptrs[func_idx_non_import]); bh_assert(action == JIT_INTERP_ACTION_NORMAL || (action == JIT_INTERP_ACTION_THROWN - && wasm_runtime_copy_exception(exec_env->module_inst, NULL))); + && wasm_copy_exception( + (WASMModuleInstance *)exec_env->module_inst, NULL))); /* Get the return values form info.out.ret */ if (func_type->result_count) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index bd347fcd..628f4d06 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3305,6 +3305,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUSH_I32(ret); break; } + case WASM_OP_ATOMIC_FENCE: + { + os_atomic_thread_fence(os_memory_order_release); + break; + } case WASM_OP_ATOMIC_I32_LOAD: case WASM_OP_ATOMIC_I32_LOAD8_U: diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c index 6f55ff74..9f08a484 100644 --- a/core/iwasm/interpreter/wasm_runtime.c +++ b/core/iwasm/interpreter/wasm_runtime.c @@ -2073,6 +2073,15 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst) if (!module_inst) return; + if (module_inst->exec_env_singleton) { + /* wasm_exec_env_destroy will call + wasm_cluster_wait_for_all_except_self to wait for other + threads, so as to destroy their exec_envs and module + instances first, and avoid accessing the shared resources + of current module instance after it is deinstantiated. */ + wasm_exec_env_destroy(module_inst->exec_env_singleton); + } + #if WASM_ENABLE_DEBUG_INTERP != 0 \ || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \ && WASM_ENABLE_LAZY_JIT != 0) @@ -2155,9 +2164,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst) wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst); #endif - if (module_inst->exec_env_singleton) - wasm_exec_env_destroy(module_inst->exec_env_singleton); - #if WASM_ENABLE_DUMP_CALL_STACK != 0 if (module_inst->frames) { bh_vector_destroy(module_inst->frames); @@ -2241,7 +2247,6 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst, WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env); uint8 *prev_top = exec_env->wasm_stack.s.top; #ifdef BH_PLATFORM_WINDOWS - const char *exce; int result; bool has_exception; char exception[EXCEPTION_BUF_LEN]; diff --git a/core/iwasm/libraries/thread-mgr/thread_manager.c b/core/iwasm/libraries/thread-mgr/thread_manager.c index 19db949d..e61915d5 100644 --- a/core/iwasm/libraries/thread-mgr/thread_manager.c +++ b/core/iwasm/libraries/thread-mgr/thread_manager.c @@ -100,7 +100,8 @@ safe_traverse_exec_env_list(WASMCluster *cluster, list_visitor visitor, while (node) { bool already_processed = false; void *proc_node; - for (size_t i = 0; i < bh_vector_size(&proc_nodes); i++) { + uint32 i; + for (i = 0; i < (uint32)bh_vector_size(&proc_nodes); i++) { if (!bh_vector_get(&proc_nodes, i, &proc_node)) { ret = false; goto final; @@ -595,14 +596,24 @@ thread_manager_start_routine(void *arg) /* Routine exit */ - /* Detach the native thread here to ensure the resources are freed */ - wasm_cluster_detach_thread(exec_env); #if WASM_ENABLE_DEBUG_INTERP != 0 wasm_cluster_thread_exited(exec_env); #endif + os_mutex_lock(&cluster_list_lock); + os_mutex_lock(&cluster->lock); + /* Detach the native thread here to ensure the resources are freed */ + if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) { + /* Only detach current thread when there is no other thread + joining it, otherwise let the system resources for the + thread be released after joining */ + os_thread_detach(exec_env->handle); + /* No need to set exec_env->thread_is_detached to true here + since we will exit soon */ + } + /* Free aux stack space */ free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom); /* Remove exec_env */ @@ -614,6 +625,8 @@ thread_manager_start_routine(void *arg) os_mutex_unlock(&cluster->lock); + os_mutex_unlock(&cluster_list_lock); + os_thread_exit(ret); return ret; } @@ -936,12 +949,23 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval) wasm_cluster_clear_thread_signal(exec_env); wasm_cluster_thread_exited(exec_env); #endif + /* App exit the thread, free the resources before exit native thread */ - /* Detach the native thread here to ensure the resources are freed */ - wasm_cluster_detach_thread(exec_env); + + os_mutex_lock(&cluster_list_lock); os_mutex_lock(&cluster->lock); + /* Detach the native thread here to ensure the resources are freed */ + if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) { + /* Only detach current thread when there is no other thread + joining it, otherwise let the system resources for the + thread be released after joining */ + os_thread_detach(exec_env->handle); + /* No need to set exec_env->thread_is_detached to true here + since we will exit soon */ + } + module_inst = exec_env->module_inst; /* Free aux stack space */ @@ -955,6 +979,8 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval) os_mutex_unlock(&cluster->lock); + os_mutex_unlock(&cluster_list_lock); + os_thread_exit(retval); } @@ -981,8 +1007,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env) return 0; } - os_mutex_lock(&exec_env->cluster->lock); - if (!clusters_have_exec_env(exec_env)) { /* Invalid thread or the thread has exited */ goto final; @@ -991,7 +1015,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env) set_thread_cancel_flags(exec_env); final: - os_mutex_unlock(&exec_env->cluster->lock); os_mutex_unlock(&cluster_list_lock); return 0; diff --git a/core/shared/platform/include/platform_api_extension.h b/core/shared/platform/include/platform_api_extension.h index 42baad74..7640e12c 100644 --- a/core/shared/platform/include/platform_api_extension.h +++ b/core/shared/platform/include/platform_api_extension.h @@ -92,6 +92,41 @@ int os_thread_detach(korp_tid); void os_thread_exit(void *retval); +/* Try to define os_atomic_thread_fence if it isn't defined in + platform's platform_internal.h */ +#ifndef os_atomic_thread_fence + +#if !defined(__GNUC_PREREQ) && (defined(__GNUC__) || defined(__GNUG__)) \ + && !defined(__clang__) && defined(__GNUC_MINOR__) +#define __GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#endif + +/* Clang's __GNUC_PREREQ macro has a different meaning than GCC one, + so we have to handle this case specially */ +#if defined(__clang__) +/* Clang provides stdatomic.h since 3.6.0 + See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */ +#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6) +#define BH_HAS_STD_ATOMIC +#endif +#elif defined(__GNUC_PREREQ) +/* Even though older versions of GCC support C11, atomics were + not implemented until 4.9. See + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58016 */ +#if __GNUC_PREREQ(4, 9) +#define BH_HAS_STD_ATOMIC +#endif /* end of __GNUC_PREREQ(4, 9) */ +#endif /* end of defined(__GNUC_PREREQ) */ + +#if defined(BH_HAS_STD_ATOMIC) && !defined(__cplusplus) +#include +#define os_memory_order_release memory_order_release +#define os_atomic_thread_fence atomic_thread_fence +#endif + +#endif /* end of os_atomic_thread_fence */ + /** * Initialize current thread environment if current thread * is created by developer but not runtime diff --git a/core/shared/platform/linux-sgx/platform_internal.h b/core/shared/platform/linux-sgx/platform_internal.h index 966b8ffc..04babaf9 100644 --- a/core/shared/platform/linux-sgx/platform_internal.h +++ b/core/shared/platform/linux-sgx/platform_internal.h @@ -63,6 +63,9 @@ os_set_print_function(os_print_function_t pf); char * strcpy(char *dest, const char *src); +#define os_memory_order_release __ATOMIC_RELEASE +#define os_atomic_thread_fence __atomic_thread_fence + #ifdef __cplusplus } #endif diff --git a/core/shared/platform/windows/platform_internal.h b/core/shared/platform/windows/platform_internal.h index 1332afc2..8d5c1488 100644 --- a/core/shared/platform/windows/platform_internal.h +++ b/core/shared/platform/windows/platform_internal.h @@ -116,6 +116,20 @@ os_thread_signal_inited(); #endif /* end of BUILD_TARGET_X86_64/AMD_64 */ #endif /* end of WASM_DISABLE_HW_BOUND_CHECK */ +typedef enum os_memory_order { + os_memory_order_relaxed, + os_memory_order_consume, + os_memory_order_acquire, + os_memory_order_release, + os_memory_order_acq_rel, + os_memory_order_seq_cst, +} os_memory_order; + +void +bh_atomic_thread_fence(int mem_order); + +#define os_atomic_thread_fence bh_atomic_thread_fence + #ifdef __cplusplus } #endif diff --git a/core/shared/platform/windows/shared_platform.cmake b/core/shared/platform/windows/shared_platform.cmake index 6ab29c89..414c7b1a 100644 --- a/core/shared/platform/windows/shared_platform.cmake +++ b/core/shared/platform/windows/shared_platform.cmake @@ -10,7 +10,8 @@ add_definitions(-DHAVE_STRUCT_TIMESPEC) include_directories(${PLATFORM_SHARED_DIR}) include_directories(${PLATFORM_SHARED_DIR}/../include) -file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c) +file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c + ${PLATFORM_SHARED_DIR}/*.cpp) set (PLATFORM_SHARED_SOURCE ${source_all}) diff --git a/core/shared/platform/windows/win_atomic.cpp b/core/shared/platform/windows/win_atomic.cpp new file mode 100644 index 00000000..80e8ef51 --- /dev/null +++ b/core/shared/platform/windows/win_atomic.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2023 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "platform_api_vmcore.h" +#include "platform_api_extension.h" + +#if WASM_ENABLE_SHARED_MEMORY != 0 + +#include + +void +bh_atomic_thread_fence(int mem_order) +{ + std::memory_order order = + (std::memory_order)(std::memory_order::memory_order_relaxed + mem_order + - os_memory_order_relaxed); + std::atomic_thread_fence(order); +} + +#endif