Fix multi-threading issues (#2013)

- Implement atomic.fence to ensure a proper memory synchronization order
- Destroy exec_env_singleton first in wasm/aot deinstantiation
- Change terminate other threads to wait for other threads in
  wasm_exec_env_destroy
- Fix detach thread in thread_manager_start_routine
- Fix duplicated lock cluster->lock in wasm_cluster_cancel_thread
- Add lib-pthread and lib-wasi-threads compilation to Windows CI
This commit is contained in:
Wenyong Huang 2023-03-08 10:57:22 +08:00 committed by GitHub
parent 7a3d2bfab6
commit f279ba84ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 167 additions and 42 deletions

View File

@ -118,3 +118,17 @@ jobs:
cmake .. -DWAMR_BUILD_DEBUG_INTERP=1 cmake .. -DWAMR_BUILD_DEBUG_INTERP=1
cmake --build . --config Release --parallel 4 cmake --build . --config Release --parallel 4
cd .. && rm -force -r build cd .. && rm -force -r build
- name: Build iwasm [lib pthread]
run: |
cd product-mini/platforms/windows
mkdir build && cd build
cmake .. -DWAMR_BUILD_LIB_PTHREAD=1
cmake --build . --config Release --parallel 4
cd .. && rm -force -r build
- name: Build iwasm [lib wasi-thread]
run: |
cd product-mini/platforms/windows
mkdir build && cd build
cmake .. -DWAMR_BUILD_LIB_WASI_THREADS=1
cmake --build . --config Release --parallel 4
cd .. && rm -force -r build

View File

@ -1226,6 +1226,15 @@ fail:
void void
aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst) aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
{ {
if (module_inst->exec_env_singleton) {
/* wasm_exec_env_destroy will call
wasm_cluster_wait_for_all_except_self to wait for other
threads, so as to destroy their exec_envs and module
instances first, and avoid accessing the shared resources
of current module instance after it is deinstantiated. */
wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
}
#if WASM_ENABLE_LIBC_WASI != 0 #if WASM_ENABLE_LIBC_WASI != 0
/* Destroy wasi resource before freeing app heap, since some fields of /* Destroy wasi resource before freeing app heap, since some fields of
wasi contex are allocated from app heap, and if app heap is freed, wasi contex are allocated from app heap, and if app heap is freed,
@ -1264,9 +1273,6 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
if (module_inst->func_type_indexes) if (module_inst->func_type_indexes)
wasm_runtime_free(module_inst->func_type_indexes); wasm_runtime_free(module_inst->func_type_indexes);
if (module_inst->exec_env_singleton)
wasm_exec_env_destroy((WASMExecEnv *)module_inst->exec_env_singleton);
if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports) if (((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports)
wasm_runtime_free( wasm_runtime_free(
((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports); ((AOTModuleInstanceExtra *)module_inst->e)->c_api_func_imports);

View File

@ -203,22 +203,12 @@ wasm_application_execute_main(WASMModuleInstanceCommon *module_inst, int32 argc,
char *argv[]) char *argv[])
{ {
bool ret; bool ret;
#if WASM_ENABLE_THREAD_MGR != 0 #if WASM_ENABLE_MEMORY_PROFILING != 0
WASMCluster *cluster;
#endif
#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
WASMExecEnv *exec_env; WASMExecEnv *exec_env;
#endif #endif
ret = execute_main(module_inst, argc, argv); ret = execute_main(module_inst, argc, argv);
#if WASM_ENABLE_THREAD_MGR != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
wasm_cluster_wait_for_all_except_self(cluster, exec_env);
}
#endif
#if WASM_ENABLE_MEMORY_PROFILING != 0 #if WASM_ENABLE_MEMORY_PROFILING != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst); exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env) { if (exec_env) {
@ -622,22 +612,12 @@ wasm_application_execute_func(WASMModuleInstanceCommon *module_inst,
const char *name, int32 argc, char *argv[]) const char *name, int32 argc, char *argv[])
{ {
bool ret; bool ret;
#if WASM_ENABLE_THREAD_MGR != 0 #if WASM_ENABLE_MEMORY_PROFILING != 0
WASMCluster *cluster;
#endif
#if WASM_ENABLE_THREAD_MGR != 0 || WASM_ENABLE_MEMORY_PROFILING != 0
WASMExecEnv *exec_env; WASMExecEnv *exec_env;
#endif #endif
ret = execute_func(module_inst, name, argc, argv); ret = execute_func(module_inst, name, argc, argv);
#if WASM_ENABLE_THREAD_MGR != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env && (cluster = wasm_exec_env_get_cluster(exec_env))) {
wasm_cluster_wait_for_all_except_self(cluster, exec_env);
}
#endif
#if WASM_ENABLE_MEMORY_PROFILING != 0 #if WASM_ENABLE_MEMORY_PROFILING != 0
exec_env = wasm_runtime_get_exec_env_singleton(module_inst); exec_env = wasm_runtime_get_exec_env_singleton(module_inst);
if (exec_env) { if (exec_env) {

View File

@ -172,16 +172,16 @@ void
wasm_exec_env_destroy(WASMExecEnv *exec_env) wasm_exec_env_destroy(WASMExecEnv *exec_env)
{ {
#if WASM_ENABLE_THREAD_MGR != 0 #if WASM_ENABLE_THREAD_MGR != 0
/* Terminate all sub-threads */ /* Wait for all sub-threads */
WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env); WASMCluster *cluster = wasm_exec_env_get_cluster(exec_env);
if (cluster) { if (cluster) {
wasm_cluster_terminate_all_except_self(cluster, exec_env); wasm_cluster_wait_for_all_except_self(cluster, exec_env);
#if WASM_ENABLE_DEBUG_INTERP != 0 #if WASM_ENABLE_DEBUG_INTERP != 0
/* Must fire exit event after other threads exits, otherwise /* Must fire exit event after other threads exits, otherwise
the stopped thread will be overrided by other threads */ the stopped thread will be overrided by other threads */
wasm_cluster_thread_exited(exec_env); wasm_cluster_thread_exited(exec_env);
#endif #endif
/* We have terminated other threads, this is the only alive thread, so /* We have waited for other threads, this is the only alive thread, so
* we don't acquire cluster->lock because the cluster will be destroyed * we don't acquire cluster->lock because the cluster will be destroyed
* inside this function */ * inside this function */
wasm_cluster_del_exec_env(cluster, exec_env); wasm_cluster_del_exec_env(cluster, exec_env);

View File

@ -1240,6 +1240,8 @@ aot_compile_func(AOTCompContext *comp_ctx, uint32 func_index)
case WASM_OP_ATOMIC_FENCE: case WASM_OP_ATOMIC_FENCE:
/* Skip memory index */ /* Skip memory index */
frame_ip++; frame_ip++;
if (!aot_compiler_op_atomic_fence(comp_ctx, func_ctx))
return false;
break; break;
case WASM_OP_ATOMIC_I32_LOAD: case WASM_OP_ATOMIC_I32_LOAD:
bytes = 4; bytes = 4;

View File

@ -1423,4 +1423,13 @@ fail:
return false; return false;
} }
bool
aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
{
return LLVMBuildFence(comp_ctx->builder,
LLVMAtomicOrderingSequentiallyConsistent, false, "")
? true
: false;
}
#endif /* end of WASM_ENABLE_SHARED_MEMORY */ #endif /* end of WASM_ENABLE_SHARED_MEMORY */

View File

@ -97,6 +97,10 @@ bool
aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx, aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx, uint32 align, AOTFuncContext *func_ctx, uint32 align,
uint32 offset, uint32 bytes); uint32 offset, uint32 bytes);
bool
aot_compiler_op_atomic_fence(AOTCompContext *comp_ctx,
AOTFuncContext *func_ctx);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -3467,6 +3467,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
{ {
/* Skip the memory index */ /* Skip the memory index */
frame_ip++; frame_ip++;
os_atomic_thread_fence(os_memory_order_release);
break; break;
} }
@ -4006,7 +4007,8 @@ fast_jit_call_func_bytecode(WASMModuleInstance *module_inst,
module_inst->fast_jit_func_ptrs[func_idx_non_import]); module_inst->fast_jit_func_ptrs[func_idx_non_import]);
bh_assert(action == JIT_INTERP_ACTION_NORMAL bh_assert(action == JIT_INTERP_ACTION_NORMAL
|| (action == JIT_INTERP_ACTION_THROWN || (action == JIT_INTERP_ACTION_THROWN
&& wasm_runtime_copy_exception(exec_env->module_inst, NULL))); && wasm_copy_exception(
(WASMModuleInstance *)exec_env->module_inst, NULL)));
/* Get the return values form info.out.ret */ /* Get the return values form info.out.ret */
if (func_type->result_count) { if (func_type->result_count) {

View File

@ -3305,6 +3305,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
PUSH_I32(ret); PUSH_I32(ret);
break; break;
} }
case WASM_OP_ATOMIC_FENCE:
{
os_atomic_thread_fence(os_memory_order_release);
break;
}
case WASM_OP_ATOMIC_I32_LOAD: case WASM_OP_ATOMIC_I32_LOAD:
case WASM_OP_ATOMIC_I32_LOAD8_U: case WASM_OP_ATOMIC_I32_LOAD8_U:

View File

@ -2073,6 +2073,15 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
if (!module_inst) if (!module_inst)
return; return;
if (module_inst->exec_env_singleton) {
/* wasm_exec_env_destroy will call
wasm_cluster_wait_for_all_except_self to wait for other
threads, so as to destroy their exec_envs and module
instances first, and avoid accessing the shared resources
of current module instance after it is deinstantiated. */
wasm_exec_env_destroy(module_inst->exec_env_singleton);
}
#if WASM_ENABLE_DEBUG_INTERP != 0 \ #if WASM_ENABLE_DEBUG_INTERP != 0 \
|| (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \ || (WASM_ENABLE_FAST_JIT != 0 && WASM_ENABLE_JIT != 0 \
&& WASM_ENABLE_LAZY_JIT != 0) && WASM_ENABLE_LAZY_JIT != 0)
@ -2155,9 +2164,6 @@ wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst)
wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst); wasm_externref_cleanup((WASMModuleInstanceCommon *)module_inst);
#endif #endif
if (module_inst->exec_env_singleton)
wasm_exec_env_destroy(module_inst->exec_env_singleton);
#if WASM_ENABLE_DUMP_CALL_STACK != 0 #if WASM_ENABLE_DUMP_CALL_STACK != 0
if (module_inst->frames) { if (module_inst->frames) {
bh_vector_destroy(module_inst->frames); bh_vector_destroy(module_inst->frames);
@ -2241,7 +2247,6 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env); WASMRuntimeFrame *prev_frame = wasm_exec_env_get_cur_frame(exec_env);
uint8 *prev_top = exec_env->wasm_stack.s.top; uint8 *prev_top = exec_env->wasm_stack.s.top;
#ifdef BH_PLATFORM_WINDOWS #ifdef BH_PLATFORM_WINDOWS
const char *exce;
int result; int result;
bool has_exception; bool has_exception;
char exception[EXCEPTION_BUF_LEN]; char exception[EXCEPTION_BUF_LEN];

View File

@ -100,7 +100,8 @@ safe_traverse_exec_env_list(WASMCluster *cluster, list_visitor visitor,
while (node) { while (node) {
bool already_processed = false; bool already_processed = false;
void *proc_node; void *proc_node;
for (size_t i = 0; i < bh_vector_size(&proc_nodes); i++) { uint32 i;
for (i = 0; i < (uint32)bh_vector_size(&proc_nodes); i++) {
if (!bh_vector_get(&proc_nodes, i, &proc_node)) { if (!bh_vector_get(&proc_nodes, i, &proc_node)) {
ret = false; ret = false;
goto final; goto final;
@ -595,14 +596,24 @@ thread_manager_start_routine(void *arg)
/* Routine exit */ /* Routine exit */
/* Detach the native thread here to ensure the resources are freed */
wasm_cluster_detach_thread(exec_env);
#if WASM_ENABLE_DEBUG_INTERP != 0 #if WASM_ENABLE_DEBUG_INTERP != 0
wasm_cluster_thread_exited(exec_env); wasm_cluster_thread_exited(exec_env);
#endif #endif
os_mutex_lock(&cluster_list_lock);
os_mutex_lock(&cluster->lock); os_mutex_lock(&cluster->lock);
/* Detach the native thread here to ensure the resources are freed */
if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
/* Only detach current thread when there is no other thread
joining it, otherwise let the system resources for the
thread be released after joining */
os_thread_detach(exec_env->handle);
/* No need to set exec_env->thread_is_detached to true here
since we will exit soon */
}
/* Free aux stack space */ /* Free aux stack space */
free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom); free_aux_stack(exec_env, exec_env->aux_stack_bottom.bottom);
/* Remove exec_env */ /* Remove exec_env */
@ -614,6 +625,8 @@ thread_manager_start_routine(void *arg)
os_mutex_unlock(&cluster->lock); os_mutex_unlock(&cluster->lock);
os_mutex_unlock(&cluster_list_lock);
os_thread_exit(ret); os_thread_exit(ret);
return ret; return ret;
} }
@ -936,12 +949,23 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
wasm_cluster_clear_thread_signal(exec_env); wasm_cluster_clear_thread_signal(exec_env);
wasm_cluster_thread_exited(exec_env); wasm_cluster_thread_exited(exec_env);
#endif #endif
/* App exit the thread, free the resources before exit native thread */ /* App exit the thread, free the resources before exit native thread */
/* Detach the native thread here to ensure the resources are freed */
wasm_cluster_detach_thread(exec_env); os_mutex_lock(&cluster_list_lock);
os_mutex_lock(&cluster->lock); os_mutex_lock(&cluster->lock);
/* Detach the native thread here to ensure the resources are freed */
if (exec_env->wait_count == 0 && !exec_env->thread_is_detached) {
/* Only detach current thread when there is no other thread
joining it, otherwise let the system resources for the
thread be released after joining */
os_thread_detach(exec_env->handle);
/* No need to set exec_env->thread_is_detached to true here
since we will exit soon */
}
module_inst = exec_env->module_inst; module_inst = exec_env->module_inst;
/* Free aux stack space */ /* Free aux stack space */
@ -955,6 +979,8 @@ wasm_cluster_exit_thread(WASMExecEnv *exec_env, void *retval)
os_mutex_unlock(&cluster->lock); os_mutex_unlock(&cluster->lock);
os_mutex_unlock(&cluster_list_lock);
os_thread_exit(retval); os_thread_exit(retval);
} }
@ -981,8 +1007,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
return 0; return 0;
} }
os_mutex_lock(&exec_env->cluster->lock);
if (!clusters_have_exec_env(exec_env)) { if (!clusters_have_exec_env(exec_env)) {
/* Invalid thread or the thread has exited */ /* Invalid thread or the thread has exited */
goto final; goto final;
@ -991,7 +1015,6 @@ wasm_cluster_cancel_thread(WASMExecEnv *exec_env)
set_thread_cancel_flags(exec_env); set_thread_cancel_flags(exec_env);
final: final:
os_mutex_unlock(&exec_env->cluster->lock);
os_mutex_unlock(&cluster_list_lock); os_mutex_unlock(&cluster_list_lock);
return 0; return 0;

View File

@ -92,6 +92,41 @@ int os_thread_detach(korp_tid);
void void
os_thread_exit(void *retval); os_thread_exit(void *retval);
/* Try to define os_atomic_thread_fence if it isn't defined in
platform's platform_internal.h */
#ifndef os_atomic_thread_fence
#if !defined(__GNUC_PREREQ) && (defined(__GNUC__) || defined(__GNUG__)) \
&& !defined(__clang__) && defined(__GNUC_MINOR__)
#define __GNUC_PREREQ(maj, min) \
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
#endif
/* Clang's __GNUC_PREREQ macro has a different meaning than GCC one,
so we have to handle this case specially */
#if defined(__clang__)
/* Clang provides stdatomic.h since 3.6.0
See https://releases.llvm.org/3.6.0/tools/clang/docs/ReleaseNotes.html */
#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6)
#define BH_HAS_STD_ATOMIC
#endif
#elif defined(__GNUC_PREREQ)
/* Even though older versions of GCC support C11, atomics were
not implemented until 4.9. See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58016 */
#if __GNUC_PREREQ(4, 9)
#define BH_HAS_STD_ATOMIC
#endif /* end of __GNUC_PREREQ(4, 9) */
#endif /* end of defined(__GNUC_PREREQ) */
#if defined(BH_HAS_STD_ATOMIC) && !defined(__cplusplus)
#include <stdatomic.h>
#define os_memory_order_release memory_order_release
#define os_atomic_thread_fence atomic_thread_fence
#endif
#endif /* end of os_atomic_thread_fence */
/** /**
* Initialize current thread environment if current thread * Initialize current thread environment if current thread
* is created by developer but not runtime * is created by developer but not runtime

View File

@ -63,6 +63,9 @@ os_set_print_function(os_print_function_t pf);
char * char *
strcpy(char *dest, const char *src); strcpy(char *dest, const char *src);
#define os_memory_order_release __ATOMIC_RELEASE
#define os_atomic_thread_fence __atomic_thread_fence
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -116,6 +116,20 @@ os_thread_signal_inited();
#endif /* end of BUILD_TARGET_X86_64/AMD_64 */ #endif /* end of BUILD_TARGET_X86_64/AMD_64 */
#endif /* end of WASM_DISABLE_HW_BOUND_CHECK */ #endif /* end of WASM_DISABLE_HW_BOUND_CHECK */
typedef enum os_memory_order {
os_memory_order_relaxed,
os_memory_order_consume,
os_memory_order_acquire,
os_memory_order_release,
os_memory_order_acq_rel,
os_memory_order_seq_cst,
} os_memory_order;
void
bh_atomic_thread_fence(int mem_order);
#define os_atomic_thread_fence bh_atomic_thread_fence
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -10,7 +10,8 @@ add_definitions(-DHAVE_STRUCT_TIMESPEC)
include_directories(${PLATFORM_SHARED_DIR}) include_directories(${PLATFORM_SHARED_DIR})
include_directories(${PLATFORM_SHARED_DIR}/../include) include_directories(${PLATFORM_SHARED_DIR}/../include)
file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c) file (GLOB_RECURSE source_all ${PLATFORM_SHARED_DIR}/*.c
${PLATFORM_SHARED_DIR}/*.cpp)
set (PLATFORM_SHARED_SOURCE ${source_all}) set (PLATFORM_SHARED_SOURCE ${source_all})

View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2023 Intel Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#include "platform_api_vmcore.h"
#include "platform_api_extension.h"
#if WASM_ENABLE_SHARED_MEMORY != 0
#include <atomic>
void
bh_atomic_thread_fence(int mem_order)
{
std::memory_order order =
(std::memory_order)(std::memory_order::memory_order_relaxed + mem_order
- os_memory_order_relaxed);
std::atomic_thread_fence(order);
}
#endif