Fix multi-threading issues (#2013)

- Implement atomic.fence to ensure a proper memory synchronization order
- Destroy exec_env_singleton first in wasm/aot deinstantiation
- Change terminate other threads to wait for other threads in
  wasm_exec_env_destroy
- Fix detach thread in thread_manager_start_routine
- Fix duplicated lock cluster->lock in wasm_cluster_cancel_thread
- Add lib-pthread and lib-wasi-threads compilation to Windows CI
This commit is contained in:
Wenyong Huang 2023-03-08 10:57:22 +08:00 committed by GitHub
parent 7a3d2bfab6
commit f279ba84ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 167 additions and 42 deletions

View File

@ -118,3 +118,17 @@ jobs:
cmake .. -DWAMR_BUILD_DEBUG_INTERP=1
cmake --build . --config Release --parallel 4
cd .. && rm -force -r build
- name: Build iwasm [lib pthread]
run: |
cd product-mini/platforms/windows
mkdir build && cd build
cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
cmake --build . --config Release --parallel 4
cd .. && rm -force -r build
- name: Build iwasm [lib wasi-thread]
run: |
cd product-mini/platforms/windows
mkdir build && cd build
cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1
cmake --build . --config Release --parallel 4
cd .. && rm -force -r build

View File

@ -1226,6 +1226,15 @@ fail:
void
aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
{
if (module_inst->exec_env_singleton) {
/* wasm_exec_env_destroy will call
wasm_cluster_wait_for_all_except_self to wait for other
threads, so as to destroy their exec_envs and module
instances first, and avoid accessing the shared resources
of current module instance after it is deinstantiated. */
wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
}
#if WASM_ENABLE_LIBC_WASI != 0
/* Destroy wasi resource before freeing app heap, since some fields of
wasi contex are allocated from app heap, and if app heap is freed,
@ -1264,9 +1273,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
if (module_inst->func_type_indexes)
wasm_runtime_free(module_inst->func_type_indexes);
if (module_inst->exec_env_singleton)
wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports)
wasm_runtime_free(
((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports);

View File

@ -203,22 +203,12 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst, int32 argc,
char *argv[])
{
bool ret;
#if WASM_ENABLE_THREAD_MGR != 0
WASMCluster *cluster;
#endif
#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
#if WASM_ENABLE_MEMORY_PROFILING != 0
WASMExecEnv *exec_env;
#endif
ret = execute_main(module_inst, argc, argv);
#if WASM_ENABLE_THREAD_MGR != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
wasm_cluster_wait_for_all_except_self(cluster, exec_env);
}
#endif
#if WASM_ENABLE_MEMORY_PROFILING != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env) {
@ -622,22 +612,12 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst,
const char *name, int32 argc, char *argv[])
{
bool ret;
#if WASM_ENABLE_THREAD_MGR != 0
WASMCluster *cluster;
#endif
#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
#if WASM_ENABLE_MEMORY_PROFILING != 0
WASMExecEnv *exec_env;
#endif
ret = execute_func(module_inst, name, argc, argv);
#if WASM_ENABLE_THREAD_MGR != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
wasm_cluster_wait_for_all_except_self(cluster, exec_env);
}
#endif
#if WASM_ENABLE_MEMORY_PROFILING != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env) {

View File

@ -172,16 +172,16 @@ void
wasm_exec_env_destroy(WASMExecEnv *exec_env)
{
#if WASM_ENABLE_THREAD_MGR != 0
/* Terminate all sub-threads */
/* Wait for all sub-threads */
WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
if (cluster) {
wasm_cluster_terminate_all_except_self(cluster, exec_env);
wasm_cluster_wait_for_all_except_self(cluster, exec_env);
#if WASM_ENABLE_DEBUG_INTERP != 0
/* Must fire exit event after other threads exits, otherwise
the stopped thread will be overrided by other threads */
wasm_cluster_thread_exited(exec_env);
#endif
/* We have terminated other threads, this is the only alive thread, so
/* We have waited for other threads, this is the only alive thread, so
* we don't acquire cluster->lock because the cluster will be destroyed
* inside this function */
wasm_cluster_del_exec_env(cluster, exec_env);

View File

@ -1240,6 +1240,8 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
case WASM_OP_ATOMIC_FENCE:
/* Skip memory index */
frame_ip++;
if (!aot_compiler_op_atomic_fence(comp_ctx, func_ctx))
return false;
break;
case WASM_OP_ATOMIC_I32_LOAD:
bytes = 4;

View File

@ -1423,4 +1423,13 @@ fail:
return false;
}
bool
aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
{
return LLVMBuildFence(comp_ctx->builder,
LLVMAtomicOrderingSequentiallyConsistent, false, "")
? true
: false;
}
#endif /* end of WASM_ENABLE_SHARED_MEMORY */

View File

@ -97,6 +97,10 @@ bool
aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 bytes);
bool
aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx);
#endif
#ifdef __cplusplus

View File

@ -3467,6 +3467,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
{
/* Skip the memory index */
frame_ip++;
os_atomic_thread_fence(os_memory_order_release);
break;
}
@ -4006,7 +4007,8 @@ fast_jit_call_func_bytecode(WASMModuleInstance *module_inst,
module_inst->fast_jit_func_ptrs[func_idx_non_import]);
bh_assert(action == JIT_INTERP_ACTION_NORMAL
|| (action == JIT_INTERP_ACTION_THROWN
&& wasm_runtime_copy_exception(exec_env->module_inst, NULL)));
&& wasm_copy_exception(
(WASMModuleInstance *)exec_env->module_inst, NULL)));
/* Get the return values form info.out.ret */
if (func_type->result_count) {

View File

@ -3305,6 +3305,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
PUSH_I32(ret);
break;
}
case WASM_OP_ATOMIC_FENCE:
{
os_atomic_thread_fence(os_memory_order_release);
break;
}
case WASM_OP_ATOMIC_I32_LOAD:
case WASM_OP_ATOMIC_I32_LOAD8_U:

View File

@ -2073,6 +2073,15 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
if (!module_inst)
return;
if (module_inst->exec_env_singleton) {
/* wasm_exec_env_destroy will call
wasm_cluster_wait_for_all_except_self to wait for other
threads, so as to destroy their exec_envs and module
instances first, and avoid accessing the shared resources
of current module instance after it is deinstantiated. */
wasm_exec_env_destroy(module_inst->exec_env_singleton);
}
#if WASM_ENABLE_DEBUG_INTERP != 0 \
|| (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
&& WASM_ENABLE_LAZY_JIT != 0)
@ -2155,9 +2164,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst);
#endif
if (module_inst->exec_env_singleton)
wasm_exec_env_destroy(module_inst->exec_env_singleton);
#if WASM_ENABLE_DUMP_CALL_STACK != 0
if (module_inst->frames) {
bh_vector_destroy(module_inst->frames);
@ -2241,7 +2247,6 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env);
uint8 *prev_top = exec_env->wasm_stack.s.top;
#ifdef BH_PLATFORM_WINDOWS
const char *exce;
int result;
bool has_exception;
char exception[EXCEPTION_BUF_LEN];

View File

@ -100,7 +100,8 @@ safe_traverse_exec_env_list(WASMCluster *cluster, list_visitor visitor,
while (node) {
bool already_processed = false;
void *proc_node;
for (size_t i = 0; i < bh_vector_size(&proc_nodes); i++) {
uint32 i;
for (i = 0; i < (uint32)bh_vector_size(&proc_nodes); i++) {
if (!bh_vector_get(&proc_nodes, i, &proc_node)) {
ret = false;
goto final;
@ -595,14 +596,24 @@ thread_manager_start_routine(void *arg)
/* Routine exit */
/* Detach the native thread here to ensure the resources are freed */
wasm_cluster_detach_thread(exec_env);
#if WASM_ENABLE_DEBUG_INTERP != 0
wasm_cluster_thread_exited(exec_env);
#endif
os_mutex_lock(&cluster_list_lock);
os_mutex_lock(&cluster->lock);
/* Detach the native thread here to ensure the resources are freed */
if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
/* Only detach current thread when there is no other thread
joining it, otherwise let the system resources for the
thread be released after joining */
os_thread_detach(exec_env->handle);
/* No need to set exec_env->thread_is_detached to true here
since we will exit soon */
}
/* Free aux stack space */
free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
/* Remove exec_env */
@ -614,6 +625,8 @@ thread_manager_start_routine(void *arg)
os_mutex_unlock(&cluster->lock);
os_mutex_unlock(&cluster_list_lock);
os_thread_exit(ret);
return ret;
}
@ -936,12 +949,23 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
wasm_cluster_clear_thread_signal(exec_env);
wasm_cluster_thread_exited(exec_env);
#endif
/* App exit the thread, free the resources before exit native thread */
/* Detach the native thread here to ensure the resources are freed */
wasm_cluster_detach_thread(exec_env);
os_mutex_lock(&cluster_list_lock);
os_mutex_lock(&cluster->lock);
/* Detach the native thread here to ensure the resources are freed */
if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
/* Only detach current thread when there is no other thread
joining it, otherwise let the system resources for the
thread be released after joining */
os_thread_detach(exec_env->handle);
/* No need to set exec_env->thread_is_detached to true here
since we will exit soon */
}
module_inst = exec_env->module_inst;
/* Free aux stack space */
@ -955,6 +979,8 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
os_mutex_unlock(&cluster->lock);
os_mutex_unlock(&cluster_list_lock);
os_thread_exit(retval);
}
@ -981,8 +1007,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
return 0;
}
os_mutex_lock(&exec_env->cluster->lock);
if (!clusters_have_exec_env(exec_env)) {
/* Invalid thread or the thread has exited */
goto final;
@ -991,7 +1015,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
set_thread_cancel_flags(exec_env);
final:
os_mutex_unlock(&exec_env->cluster->lock);
os_mutex_unlock(&cluster_list_lock);
return 0;

View File

@ -92,6 +92,41 @@ int os_thread_detach(korp_tid);
void
os_thread_exit(void *retval);
/* Try to define os_atomic_thread_fence if it isn't defined in
platform's platform_internal.h */
#ifndef os_atomic_thread_fence
#if !defined(__GNUC_PREREQ) && (defined(__GNUC__) || defined(__GNUG__)) \
&& !defined(__clang__) && defined(__GNUC_MINOR__)
#define __GNUC_PREREQ(maj, min) \
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#endif
/* Clang's __GNUC_PREREQ macro has a different meaning than GCC one,
so we have to handle this case specially */
#if defined(__clang__)
/* Clang provides stdatomic.h since 3.6.0
See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */
#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6)
#define BH_HAS_STD_ATOMIC
#endif
#elif defined(__GNUC_PREREQ)
/* Even though older versions of GCC support C11, atomics were
not implemented until 4.9. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58016 */
#if __GNUC_PREREQ(4, 9)
#define BH_HAS_STD_ATOMIC
#endif /* end of __GNUC_PREREQ(4, 9) */
#endif /* end of defined(__GNUC_PREREQ) */
#if defined(BH_HAS_STD_ATOMIC) && !defined(__cplusplus)
#include <stdatomic.h>
#define os_memory_order_release memory_order_release
#define os_atomic_thread_fence atomic_thread_fence
#endif
#endif /* end of os_atomic_thread_fence */
/**
* Initialize current thread environment if current thread
* is created by developer but not runtime

View File

@ -63,6 +63,9 @@ os_set_print_function(os_print_function_t pf);
char *
strcpy(char *dest, const char *src);
#define os_memory_order_release __ATOMIC_RELEASE
#define os_atomic_thread_fence __atomic_thread_fence
#ifdef __cplusplus
}
#endif

View File

@ -116,6 +116,20 @@ os_thread_signal_inited();
#endif /* end of BUILD_TARGET_X86_64/AMD_64 */
#endif /* end of WASM_DISABLE_HW_BOUND_CHECK */
typedef enum os_memory_order {
os_memory_order_relaxed,
os_memory_order_consume,
os_memory_order_acquire,
os_memory_order_release,
os_memory_order_acq_rel,
os_memory_order_seq_cst,
} os_memory_order;
void
bh_atomic_thread_fence(int mem_order);
#define os_atomic_thread_fence bh_atomic_thread_fence
#ifdef __cplusplus
}
#endif

View File

@ -10,7 +10,8 @@ add_definitions(-DHAVE_STRUCT_TIMESPEC)
include_directories(${PLATFORM_SHARED_DIR})
include_directories(${PLATFORM_SHARED_DIR}/../include)
file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c)
file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c
${PLATFORM_SHARED_DIR}/*.cpp)
set (PLATFORM_SHARED_SOURCE ${source_all})

View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2023 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "platform_api_vmcore.h"
#include "platform_api_extension.h"
#if WASM_ENABLE_SHARED_MEMORY != 0
#include <atomic>
void
bh_atomic_thread_fence(int mem_order)
{
std::memory_order order =
(std::memory_order)(std::memory_order::memory_order_relaxed + mem_order
- os_memory_order_relaxed);
std::atomic_thread_fence(order);
}
#endif