From 240ca2ed46e1eb5d0f6901c2282f7cc9fdf4e331 Mon Sep 17 00:00:00 2001 From: Wenyong Huang Date: Sun, 17 Jan 2021 23:23:10 -0600 Subject: [PATCH] Implement performance profiler and call stack dump, and update toolchain document (#501) And remove redundant FAST_INTERP macros in wasm_interp_fast.c, and fix wamrc --help wrong line order issue. Signed-off-by: Wenyong Huang --- build-scripts/SConscript_config | 11 +- build-scripts/config_common.cmake | 8 + core/config.h | 10 + core/iwasm/aot/aot_loader.c | 3 + core/iwasm/aot/aot_reloc.h | 14 +- core/iwasm/aot/aot_runtime.c | 191 +++++++++++++++++- core/iwasm/aot/aot_runtime.h | 37 +++- core/iwasm/common/wasm_runtime_common.c | 36 ++++ core/iwasm/compilation/aot_emit_function.c | 160 ++++++++++++++- core/iwasm/compilation/aot_llvm.c | 3 + core/iwasm/compilation/aot_llvm.h | 4 + core/iwasm/include/aot_export.h | 1 + core/iwasm/include/wasm_export.h | 20 +- core/iwasm/interpreter/wasm_interp.h | 9 +- core/iwasm/interpreter/wasm_interp_classic.c | 15 +- core/iwasm/interpreter/wasm_interp_fast.c | 25 +-- core/iwasm/interpreter/wasm_mini_loader.c | 15 +- core/iwasm/interpreter/wasm_runtime.c | 64 +++++- core/iwasm/interpreter/wasm_runtime.h | 17 +- .../lib-pthread/lib_pthread_wrapper.c | 2 +- doc/build_wamr.md | 12 ++ doc/build_wasm_app.md | 150 ++++++++++++-- doc/other_wasm_compilers.md | 37 ---- doc/pthread_library.md | 4 +- wamr-compiler/CMakeLists.txt | 3 + wamr-compiler/main.c | 10 +- 26 files changed, 752 insertions(+), 109 deletions(-) diff --git a/build-scripts/SConscript_config b/build-scripts/SConscript_config index 37f59df9..f5fe373d 100644 --- a/build-scripts/SConscript_config +++ b/build-scripts/SConscript_config @@ -1,15 +1,14 @@ - - -import os -import re - -from building import * # # Copyright (c) 2021, RT-Thread Development Team # # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # +import os +import re + +from building import * + Import('rtconfig') src = Split(''' diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 822be12a..2ee2a964 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -173,6 +173,10 @@ if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1) add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1) message (" Memory profiling enabled") endif () +if (WAMR_BUILD_PERF_PROFILING EQUAL 1) + add_definitions (-DWASM_ENABLE_PERF_PROFILING=1) + message (" Performance profiling enabled") +endif () if (DEFINED WAMR_APP_THREAD_STACK_SIZE_MAX) add_definitions (-DAPP_THREAD_STACK_SIZE_MAX=${WAMR_APP_THREAD_STACK_SIZE_MAX}) endif () @@ -180,6 +184,10 @@ if (WAMR_BUILD_CUSTOM_NAME_SECTION EQUAL 1) add_definitions (-DWASM_ENABLE_CUSTOM_NAME_SECTION=1) message (" Custom name section enabled") endif () +if (WAMR_BUILD_DUMP_CALL_STACK EQUAL 1) + add_definitions (-DWASM_ENABLE_DUMP_CALL_STACK=1) + message (" Dump call stack enabled") +endif () if (WAMR_BUILD_TAIL_CALL EQUAL 1) add_definitions (-DWASM_ENABLE_TAIL_CALL=1) message (" Tail call enabled") diff --git a/core/config.h b/core/config.h index 0e9c87ae..e9f396de 100644 --- a/core/config.h +++ b/core/config.h @@ -180,6 +180,16 @@ #define WASM_ENABLE_MEMORY_TRACING 0 #endif +/* Performance profiling */ +#ifndef WASM_ENABLE_PERF_PROFILING +#define WASM_ENABLE_PERF_PROFILING 0 +#endif + +/* Dump call stack */ +#ifndef WASM_ENABLE_DUMP_CALL_STACK +#define WASM_ENABLE_DUMP_CALL_STACK 0 +#endif + /* Heap verification */ #ifndef BH_ENABLE_GC_VERIFY #define BH_ENABLE_GC_VERIFY 0 diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index c322d407..eb9f198f 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -2174,6 +2174,9 @@ aot_convert_wasm_module(WASMModule *wasm_module, #endif #if WASM_ENABLE_SIMD != 0 option.enable_simd = true; +#endif +#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) + option.enable_aux_stack_frame = true; #endif comp_ctx = aot_create_comp_context(comp_data, &option); if (!comp_ctx) { diff --git a/core/iwasm/aot/aot_reloc.h b/core/iwasm/aot/aot_reloc.h index cbf2f9cd..cba15036 100644 --- a/core/iwasm/aot/aot_reloc.h +++ b/core/iwasm/aot/aot_reloc.h @@ -29,6 +29,14 @@ typedef struct { #define REG_ATOMIC_WAIT_SYM() #endif +#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) +#define REG_AOT_TRACE_SYM() \ + REG_SYM(aot_alloc_frame), \ + REG_SYM(aot_free_frame), +#else +#define REG_AOT_TRACE_SYM() +#endif + #if (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG) #define REG_COMMON_SYMBOLS \ REG_SYM(aot_set_exception_with_id), \ @@ -39,7 +47,8 @@ typedef struct { REG_SYM(aot_memset), \ REG_SYM(aot_memmove), \ REG_BULK_MEMORY_SYM() \ - REG_ATOMIC_WAIT_SYM() + REG_ATOMIC_WAIT_SYM() \ + REG_AOT_TRACE_SYM() #else /* else of (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG) */ #define REG_COMMON_SYMBOLS \ REG_SYM(aot_set_exception_with_id), \ @@ -62,7 +71,8 @@ typedef struct { REG_SYM(rint), \ REG_SYM(rintf), \ REG_BULK_MEMORY_SYM() \ - REG_ATOMIC_WAIT_SYM() + REG_ATOMIC_WAIT_SYM() \ + REG_AOT_TRACE_SYM() #endif /* end of (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG) */ #define CHECK_RELOC_OFFSET(data_size) do { \ diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index d26f2161..18518e0b 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -814,6 +814,15 @@ aot_instantiate(AOTModule *module, bool is_sub_inst, #endif module_inst->default_wasm_stack_size = stack_size; +#if WASM_ENABLE_PERF_PROFILING != 0 + total_size = (uint64)sizeof(AOTFuncPerfProfInfo) * + (module->import_func_count + module->func_count); + if (!(module_inst->func_perf_profilings.ptr = + runtime_malloc(total_size, error_buf, error_buf_size))) { + goto fail; + } +#endif + /* Execute __post_instantiate function and start function*/ if (!execute_post_inst_function(module_inst) || !execute_start_function(module_inst)) { @@ -866,6 +875,11 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst) wasm_runtime_destroy_wasi((WASMModuleInstanceCommon*)module_inst); #endif +#if WASM_ENABLE_PERF_PROFILING != 0 + if (module_inst->func_perf_profilings.ptr) + wasm_runtime_free(module_inst->func_perf_profilings.ptr); +#endif + if (module_inst->memories.ptr) memories_deinstantiate(module_inst); @@ -1128,16 +1142,38 @@ aot_call_function(WASMExecEnv *exec_env, cell_num += wasm_value_type_cell_num(ext_ret_types[i]); } +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + if (!aot_alloc_frame(exec_env, function->func_index)) { + wasm_runtime_free(argv1); + return false; + } +#endif + ret = invoke_native_internal(exec_env, function->u.func.func_ptr, func_type, NULL, NULL, argv1, argc, argv); + if (!ret || aot_get_exception(module_inst)) { if (argv1 != argv1_buf) wasm_runtime_free(argv1); + if (clear_wasi_proc_exit_exception(module_inst)) - return true; - return false; + ret = true; + else + ret = false; } +#if WASM_ENABLE_DUMP_CALL_STACK != 0 + if (!ret) { + aot_dump_call_stack(exec_env); + } +#endif + +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + aot_free_frame(exec_env); +#endif + if (!ret) + return ret; + /* Get extra result values */ switch (func_type->types[func_type->param_count]) { case VALUE_TYPE_I32: @@ -1161,10 +1197,28 @@ aot_call_function(WASMExecEnv *exec_env, return true; } else { +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + if (!aot_alloc_frame(exec_env, function->func_index)) { + return false; + } +#endif + ret = invoke_native_internal(exec_env, function->u.func.func_ptr, func_type, NULL, NULL, argv, argc, argv); + if (clear_wasi_proc_exit_exception(module_inst)) - return true; + ret = true; + +#if WASM_ENABLE_DUMP_CALL_STACK != 0 + if (aot_get_exception(module_inst)) { + aot_dump_call_stack(exec_env); + } +#endif + +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + aot_free_frame(exec_env); +#endif + return ret && !aot_get_exception(module_inst) ? true : false; } } @@ -2224,3 +2278,134 @@ aot_get_module_inst_mem_consumption(const AOTModuleInstance *module_inst, } #endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) || (WASM_ENABLE_MEMORY_TRACING != 0) */ + +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) +static const char * +get_func_name_from_index(const AOTModuleInstance *module_inst, + uint32 func_index) +{ + const char *func_name = NULL; + AOTModule *module = module_inst->aot_module.ptr; + + if (func_index < module->import_func_count) { + func_name = module->import_funcs[func_index].func_name; + } + else { + uint32 i; + + for (i = 0; i < module->export_count; i++) { + AOTExport export = module->exports[i]; + if (export.index == func_index + && export.kind == EXPORT_KIND_FUNC) { + func_name = export.name; + break; + } + } + } + + return func_name; +} + +bool +aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index) +{ + AOTFrame *frame = + wasm_exec_env_alloc_wasm_frame(exec_env, sizeof(AOTFrame)); +#if WASM_ENABLE_PERF_PROFILING != 0 + AOTModuleInstance *module_inst = + (AOTModuleInstance*)exec_env->module_inst; + AOTFuncPerfProfInfo *func_perf_prof = + (AOTFuncPerfProfInfo*)module_inst->func_perf_profilings.ptr + func_index; +#endif + + if (!frame) { + aot_set_exception((AOTModuleInstance*)exec_env->module_inst, + "auxiliary call stack overflow"); + return false; + } + +#if WASM_ENABLE_PERF_PROFILING != 0 + frame->time_started = os_time_get_boot_microsecond(); + frame->func_perf_prof_info = func_perf_prof; +#endif + + frame->prev_frame = (AOTFrame *)exec_env->cur_frame; + exec_env->cur_frame = (struct WASMInterpFrame *)frame; + + frame->func_index = func_index; + return true; +} + +void +aot_free_frame(WASMExecEnv *exec_env) +{ + AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame; + AOTFrame *prev_frame = cur_frame->prev_frame; + +#if WASM_ENABLE_PERF_PROFILING != 0 + cur_frame->func_perf_prof_info->total_exec_time += + os_time_get_boot_microsecond() - cur_frame->time_started; + cur_frame->func_perf_prof_info->total_exec_cnt++; +#endif + + wasm_exec_env_free_wasm_frame(exec_env, cur_frame); + exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame; +} +#endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) + || (WASM_ENABLE_PERF_PROFILING != 0) */ + +#if WASM_ENABLE_DUMP_CALL_STACK != 0 +void +aot_dump_call_stack(WASMExecEnv *exec_env) +{ + AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame; + AOTModuleInstance *module_inst = + (AOTModuleInstance *)exec_env->module_inst; + const char *func_name; + uint32 n = 0; + + os_printf("\n"); + while (cur_frame) { + func_name = + get_func_name_from_index(module_inst, cur_frame->func_index); + + /* function name not exported, print number instead */ + if (func_name == NULL) { + os_printf("#%02d $f%d \n", n, cur_frame->func_index); + } + else { + os_printf("#%02d %s \n", n, func_name); + } + + cur_frame = cur_frame->prev_frame; + n++; + } + os_printf("\n"); +} +#endif /* end of WASM_ENABLE_DUMP_CALL_STACK */ + +#if WASM_ENABLE_PERF_PROFILING != 0 +void +aot_dump_perf_profiling(const AOTModuleInstance *module_inst) +{ + AOTFuncPerfProfInfo *perf_prof = (AOTFuncPerfProfInfo *) + module_inst->func_perf_profilings.ptr; + AOTModule *module = (AOTModule *)module_inst->aot_module.ptr; + uint32 total_func_count = module->import_func_count + module->func_count, i; + const char *func_name; + + os_printf("Performance profiler data:\n"); + for (i = 0; i < total_func_count; i++, perf_prof++) { + func_name = get_func_name_from_index(module_inst, i); + + if (func_name) + os_printf(" func %s, execution time: %.3f ms, execution count: %d times\n", + func_name, perf_prof->total_exec_time / 1000.0f, + perf_prof->total_exec_cnt); + else + os_printf(" func %d, execution time: %.3f ms, execution count: %d times\n", + i, perf_prof->total_exec_time / 1000.0f, + perf_prof->total_exec_cnt); + } +} +#endif /* end of WASM_ENABLE_PERF_PROFILING */ diff --git a/core/iwasm/aot/aot_runtime.h b/core/iwasm/aot/aot_runtime.h index 4fab1903..b165b9f8 100644 --- a/core/iwasm/aot/aot_runtime.h +++ b/core/iwasm/aot/aot_runtime.h @@ -281,8 +281,13 @@ typedef struct AOTModuleInstance { uint32 llvm_stack; uint32 default_wasm_stack_size; + uint32 __padding; + + /* function performance profiling info list */ + AOTPointer func_perf_profilings; + /* reserved */ - uint32 reserved[11]; + uint32 reserved[8]; union { uint64 _make_it_8_byte_aligned_; @@ -311,6 +316,24 @@ typedef struct AOTTargetInfo { char arch[16]; } AOTTargetInfo; +typedef struct AOTFuncPerfProfInfo +{ + /* total execution time */ + uint64 total_exec_time; + /* total execution count */ + uint32 total_exec_cnt; +} AOTFuncPerfProfInfo; + +/* AOT auxiliary call stack */ +typedef struct AOTFrame { + struct AOTFrame *prev_frame; + uint32 func_index; +#if WASM_ENABLE_PERF_PROFILING != 0 + uint64 time_started; + AOTFuncPerfProfInfo *func_perf_prof_info; +#endif +} AOTFrame; + /** * Load a AOT module from aot file buffer * @param buf the byte buffer which contains the AOT file data @@ -568,6 +591,18 @@ void aot_get_module_inst_mem_consumption(const AOTModuleInstance *module_inst, WASMModuleInstMemConsumption *mem_conspn); +bool +aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index); + +void +aot_free_frame(WASMExecEnv *exec_env); + +void +aot_dump_call_stack(WASMExecEnv *exec_env); + +void +aot_dump_perf_profiling(const AOTModuleInstance *module_inst); + #ifdef __cplusplus } /* end of extern "C" */ #endif diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 9c8b3a06..125dd103 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -960,6 +960,23 @@ wasm_runtime_dump_mem_consumption(WASMExecEnv *exec_env) #endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) || (WASM_ENABLE_MEMORY_TRACING != 0) */ +#if WASM_ENABLE_PERF_PROFILING != 0 +void +wasm_runtime_dump_perf_profiling(WASMModuleInstanceCommon *module_inst) +{ +#if WASM_ENABLE_INTERP != 0 + if (module_inst->module_type == Wasm_Module_Bytecode) { + wasm_dump_perf_profiling((WASMModuleInstance*)module_inst); + } +#endif +#if WASM_ENABLE_AOT != 0 + if (module_inst->module_type == Wasm_Module_AoT) { + aot_dump_perf_profiling((AOTModuleInstance*)module_inst); + } +#endif +} +#endif + WASMModuleInstanceCommon * wasm_runtime_get_module_inst(WASMExecEnv *exec_env) { @@ -3453,3 +3470,22 @@ wasm_runtime_join_thread(wasm_thread_t tid, void **retval) } #endif + +#if WASM_ENABLE_DUMP_CALL_STACK != 0 +void +wasm_runtime_dump_call_stack(WASMExecEnv *exec_env) +{ + WASMModuleInstanceCommon *module_inst + = wasm_exec_env_get_module_inst(exec_env); +#if WASM_ENABLE_INTERP != 0 + if (module_inst->module_type == Wasm_Module_Bytecode) { + wasm_interp_dump_call_stack(exec_env); + } +#endif +#if WASM_ENABLE_AOT != 0 + if (module_inst->module_type == Wasm_Module_AoT) { + aot_dump_call_stack(exec_env); + } +#endif +} +#endif /* end of WASM_ENABLE_DUMP_CALL_STACK */ diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index 1aa9eb57..57cac163 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -8,6 +8,45 @@ #include "aot_emit_control.h" #include "../aot/aot_runtime.h" +#define GET_AOT_FUNCTION(name, argc) do { \ + if (!(func_type = LLVMFunctionType(ret_type, param_types, \ + argc, false))) { \ + aot_set_last_error("llvm add function type failed."); \ + return false; \ + } \ + if (comp_ctx->is_jit_mode) { \ + /* JIT mode, call the function directly */ \ + if (!(func_ptr_type = LLVMPointerType(func_type, 0))) { \ + aot_set_last_error("llvm add pointer type failed."); \ + return false; \ + } \ + if (!(value = I64_CONST((uint64)(uintptr_t)name)) \ + || !(func = LLVMConstIntToPtr(value, func_ptr_type))) { \ + aot_set_last_error("create LLVM value failed."); \ + return false; \ + } \ + } \ + else { \ + char *func_name = #name; \ + /* AOT mode, delcare the function */ \ + if (!(func = LLVMGetNamedFunction(comp_ctx->module, func_name)) \ + && !(func = LLVMAddFunction(comp_ctx->module, \ + func_name, func_type))) { \ + aot_set_last_error("llvm add function failed."); \ + return false; \ + } \ + } \ + } while (0) + +#define ADD_BASIC_BLOCK(block, name) do { \ + if (!(block = LLVMAppendBasicBlockInContext(comp_ctx->context, \ + func_ctx->func, \ + name))) { \ + aot_set_last_error("llvm add basic block failed."); \ + goto fail; \ + } \ + } while (0) + static bool create_func_return_block(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { @@ -239,6 +278,91 @@ call_aot_invoke_native_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return true; } +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) +static bool +call_aot_alloc_frame_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + LLVMValueRef func_idx) +{ + LLVMValueRef param_values[2], ret_value, value, func; + LLVMTypeRef param_types[2], ret_type, func_type, func_ptr_type; + LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder); + LLVMBasicBlockRef frame_alloc_fail, frame_alloc_success; + AOTFuncType *aot_func_type = func_ctx->aot_func->func_type; + + param_types[0] = comp_ctx->exec_env_type; + param_types[1] = I32_TYPE; + ret_type = INT8_TYPE; + + GET_AOT_FUNCTION(aot_alloc_frame, 2); + + param_values[0] = func_ctx->exec_env; + param_values[1] = func_idx; + + if (!(ret_value = LLVMBuildCall(comp_ctx->builder, func, + param_values, 2, + "call_aot_alloc_frame"))) { + aot_set_last_error("llvm build call failed."); + return false; + } + + if (!(ret_value = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGT, + ret_value, I8_ZERO, "frame_alloc_ret"))) { + aot_set_last_error("llvm build icmp failed."); + return false; + } + + ADD_BASIC_BLOCK(frame_alloc_fail, "frame_alloc_fail"); + ADD_BASIC_BLOCK(frame_alloc_success, "frame_alloc_success"); + + LLVMMoveBasicBlockAfter(frame_alloc_fail, block_curr); + LLVMMoveBasicBlockAfter(frame_alloc_success, block_curr); + + if (!LLVMBuildCondBr(comp_ctx->builder, ret_value, + frame_alloc_success, frame_alloc_fail)) { + aot_set_last_error("llvm build cond br failed."); + return false; + } + + /* If frame alloc failed, return this function + so the runtime can catch the exception */ + LLVMPositionBuilderAtEnd(comp_ctx->builder, frame_alloc_fail); + if (!aot_build_zero_function_ret(comp_ctx, aot_func_type)) { + return false; + } + + LLVMPositionBuilderAtEnd(comp_ctx->builder, frame_alloc_success); + + return true; + +fail: + return false; +} + +static bool +call_aot_free_frame_func(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef param_values[1], ret_value, value, func; + LLVMTypeRef param_types[1], ret_type, func_type, func_ptr_type; + + param_types[0] = comp_ctx->exec_env_type; + ret_type = INT8_TYPE; + + GET_AOT_FUNCTION(aot_free_frame, 1); + + param_values[0] = func_ctx->exec_env; + + if (!(ret_value = LLVMBuildCall(comp_ctx->builder, func, + param_values, 1, + "call_aot_free_frame"))) { + aot_set_last_error("llvm build call failed."); + return false; + } + + return true; +} +#endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) + || (WASM_ENABLE_PERF_PROFILING != 0) */ + static bool check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 callee_cell_num) @@ -334,6 +458,19 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Get param cell number */ param_cell_num = func_type->param_cell_num; +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + if (comp_ctx->enable_aux_stack_frame) { + LLVMValueRef func_idx_const; + + if (!(func_idx_const = I32_CONST(func_idx))) { + aot_set_last_error("llvm build const failed."); + return false; + } + if (!call_aot_alloc_frame_func(comp_ctx, func_ctx, func_idx_const)) + return false; + } +#endif + /* Allocate memory for parameters. * Parameters layout: * - exec env @@ -485,13 +622,20 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } } +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + if (comp_ctx->enable_aux_stack_frame) { + if (!call_aot_free_frame_func(comp_ctx, func_ctx)) + goto fail; + } +#endif + ret = true; fail: if (param_types) wasm_runtime_free(param_types); if (param_values) wasm_runtime_free(param_values); - return ret; + return ret; } static bool @@ -889,6 +1033,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, } #endif +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + if (comp_ctx->enable_aux_stack_frame) { + if (!call_aot_alloc_frame_func(comp_ctx, func_ctx, func_idx)) + goto fail; + } +#endif + /* Add basic blocks */ block_call_import = LLVMAppendBasicBlockInContext(comp_ctx->context, func_ctx->func, @@ -1066,6 +1217,13 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, PUSH(result_phis[i], func_type->types[func_param_count + i]); } +#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0) + if (comp_ctx->enable_aux_stack_frame) { + if (!call_aot_free_frame_func(comp_ctx, func_ctx)) + goto fail; + } +#endif + ret = true; fail: diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 26338dc6..15ca7e56 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -1163,6 +1163,9 @@ aot_create_comp_context(AOTCompData *comp_data, if (option->enable_simd) comp_ctx->enable_simd = true; + if (option->enable_aux_stack_frame) + comp_ctx->enable_aux_stack_frame = true; + if (option->is_jit_mode) { char *triple_jit = NULL; diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 599e664d..02b24928 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -223,6 +223,9 @@ typedef struct AOTCompContext { /* 128-bit SIMD */ bool enable_simd; + /* generate auxiliary stack frame */ + bool enable_aux_stack_frame; + /* Thread Manager */ bool enable_thread_mgr; @@ -271,6 +274,7 @@ typedef struct AOTCompOption{ bool enable_thread_mgr; bool enable_tail_call; bool enable_simd; + bool enable_aux_stack_frame; bool is_sgx_platform; uint32 opt_level; uint32 size_level; diff --git a/core/iwasm/include/aot_export.h b/core/iwasm/include/aot_export.h index 78d2edc1..c1cac678 100644 --- a/core/iwasm/include/aot_export.h +++ b/core/iwasm/include/aot_export.h @@ -43,6 +43,7 @@ typedef struct AOTCompOption{ bool enable_thread_mgr; bool enable_tail_call; bool enable_simd; + bool enable_aux_stack_frame; bool is_sgx_platform; uint32_t opt_level; uint32_t size_level; diff --git a/core/iwasm/include/wasm_export.h b/core/iwasm/include/wasm_export.h index 44c32b6f..d8f585fc 100644 --- a/core/iwasm/include/wasm_export.h +++ b/core/iwasm/include/wasm_export.h @@ -227,7 +227,6 @@ wasm_runtime_free(void *ptr); WASM_RUNTIME_API_EXTERN package_type_t get_package_type(const uint8_t *buf, uint32_t size); -#if WASM_ENABLE_MULTI_MODULE != 0 /** * It is a callback for WAMR providing by embedding to load a module file * into a buffer @@ -275,7 +274,6 @@ wasm_runtime_register_module(const char *module_name, wasm_module_t module, */ WASM_RUNTIME_API_EXTERN wasm_module_t wasm_runtime_find_module_registered(const char *module_name); -#endif /* WASM_ENABLE_MULTI_MODULE */ /** * Load a WASM module from a specified byte buffer. The byte buffer can be @@ -787,7 +785,14 @@ wasm_runtime_get_user_data(wasm_exec_env_t exec_env); WASM_RUNTIME_API_EXTERN void wasm_runtime_dump_mem_consumption(wasm_exec_env_t exec_env); -#if WASM_ENABLE_THREAD_MGR != 0 +/** + * Dump runtime performance profiler data of each function + * + * @param module_inst the WASM module instance to profile + */ +WASM_RUNTIME_API_EXTERN void +wasm_runtime_dump_perf_profiling(wasm_module_inst_t module_inst); + /* wasm thread callback function type */ typedef void* (*wasm_thread_callback_t)(wasm_exec_env_t, void *); /* wasm thread type */ @@ -844,7 +849,14 @@ wasm_runtime_spawn_thread(wasm_exec_env_t exec_env, wasm_thread_t *tid, */ WASM_RUNTIME_API_EXTERN int32_t wasm_runtime_join_thread(wasm_thread_t tid, void **retval); -#endif + +/** + * dump the call stack + * + * @param exec_env the execution environment + */ +WASM_RUNTIME_API_EXTERN void +wasm_runtime_dump_call_stack(wasm_exec_env_t exec_env); #ifdef __cplusplus } diff --git a/core/iwasm/interpreter/wasm_interp.h b/core/iwasm/interpreter/wasm_interp.h index 5f867249..a0d56f32 100644 --- a/core/iwasm/interpreter/wasm_interp.h +++ b/core/iwasm/interpreter/wasm_interp.h @@ -26,6 +26,10 @@ typedef struct WASMInterpFrame { /* Instruction pointer of the bytecode array. */ uint8 *ip; +#if WASM_ENABLE_PERF_PROFILING != 0 + uint64 time_started; +#endif + #if WASM_ENABLE_FAST_INTERP != 0 /* return offset of the first return value of current frame. the callee will put return values here continuously */ @@ -74,11 +78,6 @@ wasm_interp_call_wasm(struct WASMModuleInstance *module_inst, struct WASMFunctionInstance *function, uint32 argc, uint32 argv[]); -#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 -void -wasm_interp_dump_call_stack(struct WASMExecEnv *exec_env); -#endif - #ifdef __cplusplus } #endif diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 55c619c2..bf39335e 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -900,8 +900,12 @@ ALLOC_FRAME(WASMExecEnv *exec_env, uint32 size, WASMInterpFrame *prev_frame) { WASMInterpFrame *frame = wasm_exec_env_alloc_wasm_frame(exec_env, size); - if (frame) + if (frame) { frame->prev_frame = prev_frame; +#if WASM_ENABLE_PERF_PROFILING != 0 + frame->time_started = os_time_get_boot_microsecond(); +#endif + } else { wasm_set_exception((WASMModuleInstance*)exec_env->module_inst, "stack overflow"); @@ -913,6 +917,13 @@ ALLOC_FRAME(WASMExecEnv *exec_env, uint32 size, WASMInterpFrame *prev_frame) static inline void FREE_FRAME(WASMExecEnv *exec_env, WASMInterpFrame *frame) { +#if WASM_ENABLE_PERF_PROFILING != 0 + if (frame->function) { + frame->function->total_exec_time += os_time_get_boot_microsecond() + - frame->time_started; + frame->function->total_exec_cnt++; + } +#endif wasm_exec_env_free_wasm_frame(exec_env, frame); } @@ -3361,7 +3372,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, } } else { -#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 +#if WASM_ENABLE_DUMP_CALL_STACK != 0 wasm_interp_dump_call_stack(exec_env); #endif LOG_DEBUG("meet an exception %s", wasm_get_exception(module_inst)); diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 53810bad..de157f8c 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -909,8 +909,12 @@ ALLOC_FRAME(WASMExecEnv *exec_env, uint32 size, WASMInterpFrame *prev_frame) { WASMInterpFrame *frame = wasm_exec_env_alloc_wasm_frame(exec_env, size); - if (frame) + if (frame) { frame->prev_frame = prev_frame; +#if WASM_ENABLE_PERF_PROFILING != 0 + frame->time_started = os_time_get_boot_microsecond(); +#endif + } else { wasm_set_exception((WASMModuleInstance*)exec_env->module_inst, "stack overflow"); @@ -922,6 +926,13 @@ ALLOC_FRAME(WASMExecEnv *exec_env, uint32 size, WASMInterpFrame *prev_frame) static inline void FREE_FRAME(WASMExecEnv *exec_env, WASMInterpFrame *frame) { +#if WASM_ENABLE_PERF_PROFILING != 0 + if (frame->function) { + frame->function->total_exec_time += os_time_get_boot_microsecond() + - frame->time_started; + frame->function->total_exec_cnt++; + } +#endif wasm_exec_env_free_wasm_frame(exec_env, frame); } @@ -1086,9 +1097,6 @@ wasm_interp_dump_op_count() #else #define HANDLE_OP(opcode) HANDLE_##opcode #endif -#if WASM_ENABLE_FAST_INTERP == 0 -#define FETCH_OPCODE_AND_DISPATCH() goto *handle_table[*frame_ip++] -#else #if WASM_ENABLE_ABS_LABEL_ADDR != 0 #define FETCH_OPCODE_AND_DISPATCH() do { \ const void *p_label_addr = *(void**)frame_ip; \ @@ -1103,7 +1111,6 @@ wasm_interp_dump_op_count() goto *p_label_addr; \ } while (0) #endif -#endif #define HANDLE_OP_END() FETCH_OPCODE_AND_DISPATCH() #else /* else of WASM_ENABLE_LABELS_AS_VALUES */ @@ -1113,9 +1120,7 @@ wasm_interp_dump_op_count() #endif /* end of WASM_ENABLE_LABELS_AS_VALUES */ -#if WASM_ENABLE_FAST_INTERP != 0 static void **global_handle_table; -#endif static void wasm_interp_call_func_bytecode(WASMModuleInstance *module, @@ -1150,13 +1155,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #define HANDLE_OPCODE(op) &&HANDLE_##op DEFINE_GOTO_TABLE (const void*, handle_table); #undef HANDLE_OPCODE -#if WASM_ENABLE_FAST_INTERP != 0 if (exec_env == NULL) { global_handle_table = (void **)handle_table; return; } #endif -#endif #if WASM_ENABLE_LABELS_AS_VALUES == 0 while (frame_ip < frame_ip_end) { @@ -3330,7 +3333,6 @@ recover_br_info: #endif } -#if WASM_ENABLE_FAST_INTERP != 0 void ** wasm_interp_get_handle_table() { @@ -3339,7 +3341,6 @@ wasm_interp_get_handle_table() wasm_interp_call_func_bytecode(&module, NULL, NULL, NULL); return global_handle_table; } -#endif void wasm_interp_call_wasm(WASMModuleInstance *module_inst, @@ -3412,7 +3413,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, argv[i] = *(frame->lp + i); } else { -#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 +#if WASM_ENABLE_DUMP_CALL_STACK != 0 wasm_interp_dump_call_stack(exec_env); #endif } diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index 2dc38768..f2c19bcd 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -1448,7 +1448,6 @@ load_user_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module, static bool wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, - BlockAddr *block_addr_cache, char *error_buf, uint32 error_buf_size); #if WASM_ENABLE_FAST_INTERP != 0 @@ -1472,9 +1471,7 @@ load_from_sections(WASMModule *module, WASMSection *sections, uint32 aux_stack_top = (uint32)-1, global_index, func_index, i; uint32 aux_data_end_global_index = (uint32)-1; uint32 aux_heap_base_global_index = (uint32)-1; - BlockAddr *block_addr_cache; WASMType *func_type; - uint64 total_size; /* Find code and function sections if have */ while (section) { @@ -1746,22 +1743,13 @@ load_from_sections(WASMModule *module, WASMSection *sections, handle_table = wasm_interp_get_handle_table(); #endif - total_size = sizeof(BlockAddr) * (uint64)BLOCK_ADDR_CACHE_SIZE * BLOCK_ADDR_CONFLICT_SIZE; - if (!(block_addr_cache = loader_malloc - (total_size, error_buf, error_buf_size))) { - return false; - } - for (i = 0; i < module->function_count; i++) { WASMFunction *func = module->functions[i]; - memset(block_addr_cache, 0, (uint32)total_size); - if (!wasm_loader_prepare_bytecode(module, func, block_addr_cache, + if (!wasm_loader_prepare_bytecode(module, func, error_buf, error_buf_size)) { - wasm_runtime_free(block_addr_cache); return false; } } - wasm_runtime_free(block_addr_cache); if (!module->possible_memory_grow) { WASMMemoryImport *memory_import; @@ -4251,7 +4239,6 @@ fail: static bool wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, - BlockAddr *block_addr_cache, char *error_buf, uint32 error_buf_size) { uint8 *p = func->code, *p_end = func->code + func->code_size, *p_org; diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c index 6bf75c51..f444bdc1 100644 --- a/core/iwasm/interpreter/wasm_runtime.c +++ b/core/iwasm/interpreter/wasm_runtime.c @@ -1657,6 +1657,49 @@ wasm_get_exception(WASMModuleInstance *module_inst) return module_inst->cur_exception; } +#if WASM_ENABLE_PERF_PROFILING != 0 +void +wasm_dump_perf_profiling(const WASMModuleInstance *module_inst) +{ + WASMExportFuncInstance *export_func; + WASMFunctionInstance *func_inst; + char *func_name; + uint32 i, j; + + os_printf("Performance profiler data:\n"); + for (i = 0; i < module_inst->function_count; i++) { + func_inst = module_inst->functions + i; + if (func_inst->is_import_func) { + func_name = func_inst->u.func_import->field_name; + } +#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 + else if (func_inst->u.func->field_name) { + func_name = func_inst->u.func->field_name; + } +#endif + else { + func_name = NULL; + for (j = 0; j < module_inst->export_func_count; j++) { + export_func = module_inst->export_functions + j; + if (export_func->function == func_inst) { + func_name = export_func->name; + break; + } + } + } + + if (func_name) + os_printf(" func %s, execution time: %.3f ms, execution count: %d times\n", + func_name, module_inst->functions[i].total_exec_time / 1000.0f, + module_inst->functions[i].total_exec_cnt); + else + os_printf(" func %d, execution time: %.3f ms, execution count: %d times\n", + i, module_inst->functions[i].total_exec_time / 1000.0f, + module_inst->functions[i].total_exec_cnt); + } +} +#endif + uint32 wasm_module_malloc(WASMModuleInstance *module_inst, uint32 size, void **p_native_addr) @@ -2205,7 +2248,7 @@ wasm_get_module_inst_mem_consumption(const WASMModuleInstance *module_inst, #endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) || (WASM_ENABLE_MEMORY_TRACING != 0) */ -#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 +#if WASM_ENABLE_DUMP_CALL_STACK != 0 void wasm_interp_dump_call_stack(struct WASMExecEnv *exec_env) { @@ -2214,18 +2257,33 @@ wasm_interp_dump_call_stack(struct WASMExecEnv *exec_env) WASMInterpFrame *cur_frame = wasm_exec_env_get_cur_frame(exec_env); WASMFunctionInstance *func_inst; + WASMExportFuncInstance *export_func; const char *func_name = NULL; - uint32 n; + uint32 n, i; os_printf("\n"); for (n = 0; cur_frame && cur_frame->function; n++) { + func_name = NULL; func_inst = cur_frame->function; if (func_inst->is_import_func) { func_name = func_inst->u.func_import->field_name; } else { +#if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 func_name = func_inst->u.func->field_name; +#endif + /* if custom name section is not generated, + search symbols from export table */ + if (!func_name) { + for (i = 0; i < module_inst->export_func_count; i++) { + export_func = module_inst->export_functions + i; + if (export_func->function == func_inst) { + func_name = export_func->name; + break; + } + } + } } /* function name not exported, print number instead */ @@ -2240,4 +2298,4 @@ wasm_interp_dump_call_stack(struct WASMExecEnv *exec_env) } os_printf("\n"); } -#endif /* end of WASM_ENABLE_CUSTOM_NAME_SECTION */ +#endif /* end of WASM_ENABLE_DUMP_CALL_STACK */ diff --git a/core/iwasm/interpreter/wasm_runtime.h b/core/iwasm/interpreter/wasm_runtime.h index 4f13b689..ccde47e4 100644 --- a/core/iwasm/interpreter/wasm_runtime.h +++ b/core/iwasm/interpreter/wasm_runtime.h @@ -120,6 +120,12 @@ struct WASMFunctionInstance { WASMModuleInstance *import_module_inst; WASMFunctionInstance *import_func_inst; #endif +#if WASM_ENABLE_PERF_PROFILING != 0 + /* total execution time */ + uint64 total_exec_time; + /* total execution count */ + uint32 total_exec_cnt; +#endif }; typedef struct WASMExportFuncInstance { @@ -281,12 +287,15 @@ wasm_instantiate(WASMModule *module, bool is_sub_inst, uint32 stack_size, uint32 heap_size, char *error_buf, uint32 error_buf_size); +void +wasm_dump_perf_profiling(const WASMModuleInstance *module_inst); + void wasm_deinstantiate(WASMModuleInstance *module_inst, bool is_sub_inst); WASMFunctionInstance * wasm_lookup_function(const WASMModuleInstance *module_inst, - const char *name, const char *signature); + const char *name, const char *signature); #if WASM_ENABLE_MULTI_MODULE != 0 WASMGlobalInstance * @@ -383,6 +392,12 @@ wasm_get_module_mem_consumption(const WASMModule *module, void wasm_get_module_inst_mem_consumption(const WASMModuleInstance *module, WASMModuleInstMemConsumption *mem_conspn); + +#if WASM_ENABLE_DUMP_CALL_STACK != 0 +void +wasm_interp_dump_call_stack(struct WASMExecEnv *exec_env); +#endif + #ifdef __cplusplus } #endif diff --git a/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c b/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c index c0dcb9da..35359c50 100644 --- a/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c +++ b/core/iwasm/libraries/lib-pthread/lib_pthread_wrapper.c @@ -616,7 +616,7 @@ pthread_join_wrapper(wasm_exec_env_t exec_env, uint32 thread, /* validate addr before join thread, otherwise the module_inst may be freed */ - if (!validate_app_addr(retval_offset, sizeof(uint32))) { + if (!validate_app_addr(retval_offset, sizeof(void *))) { /* Join failed, but we don't want to terminate all threads, do not spread exception here */ wasm_runtime_set_exception(module_inst, NULL); diff --git a/doc/build_wamr.md b/doc/build_wamr.md index f26f1a2b..add8412e 100644 --- a/doc/build_wamr.md +++ b/doc/build_wamr.md @@ -48,8 +48,14 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM - **WAMR_BUILD_CUSTOM_NAME_SECTION**=1/0, load the function name from custom name section, default to disable if not set +#### **Enable dump call stack feature** +- **WAMR_BUILD_DUMP_CALL_STACK**=1/0, default to disable if not set + > Note: if it is enabled, the call stack will be dumped when exception occurs. +> - For interpreter mode, the function names are firstly extracted from *custom name section*, if this section doesn't exist or the feature is not enabled, then the name will be extracted from the import/export sections +> - For AoT/JIT mode, the function names are extracted from import/export section, please export as many functions as possible (for `wasi-sdk` you can use `-Wl,--export-all`) when compiling wasm module, and add `--enable-dump-call-stack` option to wamrc during compiling AoT module. + #### **Enable Multi-Module feature** - **WAMR_BUILD_MULTI_MODULE**=1/0, default to disable if not set @@ -79,6 +85,12 @@ cmake -DWAMR_BUILD_PLATFORM=linux -DWAMR_BUILD_TARGET=ARM > Note: if it is enabled, developer can use API `void wasm_runtime_dump_mem_consumption(wasm_exec_env_t exec_env)` to dump the memory consumption info. Currently we only profile the memory consumption of module, module_instance and exec_env, the memory consumed by other components such as `wasi-ctx`, `multi-module` and `thread-manager` are not included. +#### **Enable performance profiling (Experiment)** +- **WAMR_BUILD_PERF_PROFILING**=1/0, default to disable if not set +> Note: if it is enabled, developer can use API `void wasm_runtime_dump_perf_profiling(wasm_module_inst_t module_inst)` to dump the performance consumption info. Currently we only profile the performance consumption of each WASM function. + +> The function name searching sequence is the same with dump call stack feature. + #### **Set maximum app thread stack size** - **WAMR_APP_THREAD_STACK_SIZE_MAX**=n, default to 8 MB (8388608) if not set > Note: the AOT boundary check with hardware trap mechanism might consume large stack since the OS may lazily grow the stack mapping as a guard page is hit, we may use this configuration to reduce the total stack usage, e.g. -DWAMR_APP_THREAD_STACK_SIZE_MAX=131072 (128 KB). diff --git a/doc/build_wasm_app.md b/doc/build_wasm_app.md index ba192783..ca82531f 100644 --- a/doc/build_wasm_app.md +++ b/doc/build_wasm_app.md @@ -2,12 +2,16 @@ # Prepare WASM building environments -WASI-SDK version 8.0+ is the major tool supported by WAMR to build WASM applications. There are some other WASM compilers such as the standard clang compiler and Emscripten might also work [here](./other_wasm_compilers.md). +For C and C++, WASI-SDK version 12.0+ is the major tool supported by WAMR to build WASM applications. Also we can use [Emscripten SDK (EMSDK)](https://github.com/emscripten-core/emsdk), but it is not recommended. And there are some other compilers such as the standard clang compiler, which might also work [here](./other_wasm_compilers.md). -Install WASI SDK: Download the [wasi-sdk](https://github.com/CraneStation/wasi-sdk/releases) and extract the archive to default path `/opt/wasi-sdk` +To install WASI SDK, please download the [wasi-sdk release](https://github.com/CraneStation/wasi-sdk/releases) and extract the archive to default path `/opt/wasi-sdk`. + +For [AssemblyScript](https://github.com/AssemblyScript/assemblyscript), please refer to [AssemblyScript quick start](https://www.assemblyscript.org/quick-start.html) and [AssemblyScript compiler](https://www.assemblyscript.org/compiler.html#command-line-options) for how to install `asc` compiler and build WASM applications. + +For Rust, please firstly ref to [Install Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) to install cargo, rustc and rustup, by default they are installed under ~/.cargo/bin, and then run `rustup target add wasm32-wasi` to install wasm32-wasi target for Rust toolchain. To build WASM applications, we can run `cargo build --target wasm32-wasi`, the output files are under `target/wasm32-wasi`. -Build WASM applications +Build WASM applications with wasi-sdk ========================= You can write a simple ```test.c``` as the first sample. @@ -44,21 +48,23 @@ To build the source file to WASM bytecode, we can input the following command: /opt/wasi-sdk/bin/clang -O3 -o test.wasm test.c ``` -There are some useful options which can be specified to build the source code: +## 1. wasi-sdk options -- **-nostdlib** Do not use the standard system startup files or libraries when linking. In this mode, the libc-builtin library of WAMR must be built to run the wasm app, otherwise, the libc-wasi library must be built. You can specify **-DWAMR_BUILD_LIBC_BUILTIN** or **-DWAMR_BUILD_LIBC_WASI** for cmake to build WAMR with libc-builtin support or libc-wasi support. +There are some useful options which can be specified to build the source code (for more link options, please run `/opt/wasi-sdk/bin/wasm-ld --help`): + +- **-nostdlib** Do not use the standard system startup files or libraries when linking. In this mode, the **libc-builtin** library of WAMR must be built to run the wasm app, otherwise, the **libc-wasi** library must be built. You can specify **-DWAMR_BUILD_LIBC_BUILTIN=1** or **-DWAMR_BUILD_LIBC_WASI=1** for cmake to build WAMR with libc-builtin support or libc-wasi support. - **-Wl,--no-entry** Do not output any entry point -- **-Wl,--export=** Force a symbol to be exported, e.g. **-Wl,--export=main** to export main function +- **-Wl,--export=\** Force a symbol to be exported, e.g. **-Wl,--export=foo** to export foo function - **-Wl,--export-all** Export all symbols (normally combined with --no-gc-sections) -- **-Wl,--initial-memory=** Initial size of the linear memory, which must be a multiple of 65536 +- **-Wl,--initial-memory=\** Initial size of the linear memory, which must be a multiple of 65536 -- **-Wl,--max-memory=** Maximum size of the linear memory, which must be a multiple of 65536 +- **-Wl,--max-memory=\** Maximum size of the linear memory, which must be a multiple of 65536 -- **-z stack-size=** The auxiliary stack size, which is an area of linear memory, and must be smaller than initial memory size. +- **-z stack-size=\** The auxiliary stack size, which is an area of linear memory, and must be smaller than initial memory size. - **-Wl,--strip-all** Strip all symbols @@ -66,11 +72,12 @@ There are some useful options which can be specified to build the source code: - **-Wl,--allow-undefined** Allow undefined symbols in linked binary -- **-Wl,--allow-undefined-file=** Allow symbols listed in to be undefined in linked binary +- **-Wl,--allow-undefined-file=\** Allow symbols listed in \ to be undefined in linked binary - **-pthread** Support POSIX threads in generated code For example, we can build the wasm app with command: + ``` Bash /opt/wasi-sdk/bin/clang -O3 -nostdlib \ -z stack-size=8192 -Wl,--initial-memory=65536 \ @@ -79,7 +86,112 @@ For example, we can build the wasm app with command: -Wl,--export=__heap_base -Wl,--export=__data_end \ -Wl,--no-entry -Wl,--strip-all -Wl,--allow-undefined ``` -to generate a wasm binary with small footprint. +to generate a wasm binary with nostdlib mode, auxiliary stack size is 8192 bytes, initial memory size is 64 KB, main function, heap base global and data end global are exported, no entry function is generated (no _start function is exported), and all symbols are stripped. Note that it is nostdlib mode, so libc-builtin should be enabled by runtime embedder or iwasm (with cmake -DWAMR_BUILD_LIBC_BUILT=1, enabled by iwasm in Linux by default). + +If we want to build the wasm app with wasi mode, we may build the wasm app with command: + +```bash +/opt/wasi-sdk/bin/clang -O3 \ + -z stack-size=8192 -Wl,--initial-memory=65536 \ + -o test.wasm test.c \ + -Wl,--export=__heap_base -Wl,--export=__data_end \ + -Wl,--strip-all +``` + +to generate a wasm binary with wasi mode, auxiliary stack size is 8192 bytes, initial memory size is 64 KB, heap base global and data end global are exported, wasi entry function exported (_start function), and all symbols are stripped. Note that it is wasi mode, so libc-wasi should be enabled by runtime embedder or iwasm (with cmake -DWAMR_BUILD_LIBC_WASI=1, enabled by iwasm in Linux by default), and normally no need to export main function, by default _start function is executed by iwasm. + +## 2. How to reduce the footprint? + +Firstly if libc-builtin (-nostdlib) mode meets the requirements, e.g. there are no file io operations in wasm app, we should build the wasm app with -nostdlib option as possible as we can, since the compiler doesn't build the libc source code into wasm bytecodes, which greatly reduces the binary size. + +### (1) Methods to reduce the libc-builtin (-nostdlib) mode footprint + +- export \_\_heap_base global and \_\_data_end global + ```bash + -Wl,--export=__heap_base -Wl,--export=__data_end + ``` + If the two globals are exported, and there are no memory.grow and memory.size opcodes (normally nostdlib mode doesn't introduce these opcodes since the libc malloc function isn't built into wasm bytecode), WAMR runtime will truncate the linear memory at the place of \__heap_base and append app heap to the end, so we don't need to allocate the memory specified by `-Wl,--initial-memory=n` which must be at least 64 KB. This is helpful for some embedded devices whose memory resource might be limited. + +- reduce auxiliary stack size + + The auxiliary stack is an area of linear memory, normally the size is 64 KB by default which might be a little large for embedded devices and actually partly used, we can use `-z stack-size=n` to set its size. + +- use -O3 and -Wl,--strip-all + +- reduce app heap size when running iwasm + + We can pass `--heap-size=n` option to set the maximum app heap size for iwasm, by default it is 16 KB. For the runtime embedder, we can set the `uint32_t heap_size` argument when calling API ` wasm_runtime_instantiate`. + +- reduce wasm operand stack size when running iwasm + + WebAssembly is a binary instruction format for a stack-based virtual machine, which requires a stack to execute the bytecodes. We can pass `--stack-size=n` option to set the maximum stack size for iwasm, by default it is 16 KB. For the runtime embedder, we can set the `uint32_t stack_size` argument when calling API ` wasm_runtime_instantiate` and `wasm_runtime_create_exec_env`. + +- decrease block_addr_cache size for classic interpreter + + The block_addr_cache is an hash cache to store the else/end addresses for WebAssembly blocks (BLOCK/IF/LOOP) to speed up address lookup. This is only available in classic interpreter. We can set it by define macro `-DBLOCK_ADDR_CACHE_SIZE=n`, e.g. add `add_defintion (-DBLOCK_ADDR_CACHE_SIZE=n)` in CMakeLists.txt, by default it is 64, and total block_addr_cache size is 3072 bytes in 64-bit platform and 1536 bytes in 32-bit platform. + +### (2) Methods to reduce the libc-wasi (without -nostdlib) mode footprint + +Most of the above methods are also available for libc-wasi mode, besides them, we can export malloc and free functions with `-Wl,--export=malloc -Wl,--export=free` option, so WAMR runtime will disable its app heap and call the malloc/free function exported to allocate/free the memory from/to the heap space managed by libc. + +## 3. Build wasm app with pthread support + +Please ref to [pthread library](./pthread_library.md) for more details. + +## 4. Build wasm app with SIMD support + +Normally we should install emsdk and use its SSE header files, please ref to workload samples, e.g. [bwa CMakeLists.txt](../samples/workload/bwa/CMakeLists.txt) and [wasm-av1 CMakeLists.txt](../samples/workload/wasm-av1/CMakeLists.txt) for more details. + +# Build WASM applications with emsdk + +## 1. Install emsdk + +Assuming you are using Linux, you may install emcc and em++ from Emscripten EMSDK following the steps below: + +``` +git clone https://github.com/emscripten-core/emsdk.git +cd emsdk +./emsdk install latest +./emsdk activate latest +# And then source the emsdk_env.sh script before build wasm app +source emsdk_env.sh (or add it to ~/.bashrc if you don't want to run it each time) +``` + +The Emscripten website provides other installation methods beyond Linux. + +## 2. emsdk options + +To build the wasm C source code into wasm binary, we can use the following command: + +```bash +EMCC_ONLY_FORCED_STDLIBS=1 emcc -O3 -s STANDALONE_WASM=1 \ + -o test.wasm test.c \ + -s TOTAL_STACK=4096 -s TOTAL_MEMORY=65536 \ + -s "EXPORTED_FUNCTIONS=['_main']" \ + -s ERROR_ON_UNDEFINED_SYMBOLS=0 +``` + +There are some useful options: + +- **EMCC_ONLY_FORCED_STDLIBS=1** whether to link libc library into the output binary or not, similar to `-nostdlib` option of wasi-sdk clang. If specified, then no libc library is linked and the **libc-builtin** library of WAMR must be built to run the wasm app, otherwise, the **libc-wasi** library must be built. You can specify **-DWAMR_BUILD_LIBC_BUILTIN=1** or **-DWAMR_BUILD_LIBC_WASI=1** for cmake to build WAMR with libc-builtin support or libc-wasi support. + + The emsdk's wasi implementation is incomplete, e.g. open a file might just return fail, so it is strongly not recommended to use this mode, especially when there are file io operations in wasm app, please use wasi-sdk instead. + +- **-s STANDALONE_WASM=1** build wasm app in standalone mode (non-web mode), if the output file has suffix ".wasm", then only wasm file is generated (without html file and JavaScript file). + +- **-s TOTAL_STACK=\** the auxiliary stack size, same as `-z stack-size=\` of wasi-sdk + +- **-s TOTAL_MEMORY=\** or **-s INITIAL_MEORY=\** the initial linear memory size + +- **-s MAXIMUM_MEMORY=\** the maximum linear memory size, only take effect if **-s ALLOW_MEMORY_GROWTH=1** is set + +- **-s ALLOW_MEMORY_GROWTH=1/0** whether the linear memory is allowed to grow or not + +- **-s "EXPORTED_FUNCTIONS=['func name1', 'func name2']"** to export functions + +- **-s ERROR_ON_UNDEFINED_SYMBOLS=0** disable the errors when there are undefined symbols + +For more options, please ref to /upstream/emscripten/src/settings.js, or [Emscripten document](https://emscripten.org/docs/compiling/Building-Projects.html). # Build a project with cmake @@ -136,19 +248,29 @@ Usage: wamrc [options] -o output_file wasm_file Use +feature to enable a feature, or -feature to disable it For example, --cpu-features=+feature1,-feature2 Use --cpu-features=+help to list all the features supported - --opt-level=n Set the optimization level (0 to 3, default: 3, which is fastest) - --size-level=n Set the code size level (0 to 3, default: 3, which is smallest) + --opt-level=n Set the optimization level (0 to 3, default is 3) + --size-level=n Set the code size level (0 to 3, default is 3) -sgx Generate code for SGX platform (Intel Software Guard Extention) + --bounds-checks=1/0 Enable or disable the bounds checks for memory access: + by default it is disabled in all 64-bit platforms except SGX and + in these platforms runtime does bounds checks with hardware trap, + and by default it is enabled in all 32-bit platforms --format= Specifies the format of the output file The format supported: aot (default) AoT file object Native object file llvmir-unopt Unoptimized LLVM IR llvmir-opt Optimized LLVM IR + --enable-bulk-memory Enable the post-MVP bulk memory feature + --enable-multi-thread Enable multi-thread feature, the dependent features bulk-memory and + --enable-tail-call Enable the post-MVP tail call feature + thread-mgr will be enabled automatically + --enable-simd Enable the post-MVP 128-bit SIMD feature + --enable-dump-call-stack Enable stack trace feature + -v=n Set log verbose level (0 to 5, default is 2), larger with more log Examples: wamrc -o test.aot test.wasm wamrc --target=i386 -o test.aot test.wasm wamrc --target=i386 --format=object -o test.o test.wasm - ``` diff --git a/doc/other_wasm_compilers.md b/doc/other_wasm_compilers.md index ac1ac175..5aa505ab 100644 --- a/doc/other_wasm_compilers.md +++ b/doc/other_wasm_compilers.md @@ -1,5 +1,4 @@ - ## Use clang compiler The recommended method to build a WASM binary is to use clang compiler ```clang-8```. You can refer to [apt.llvm.org](https://apt.llvm.org) for the detailed instructions. Here are referenced steps to install clang-8 in Ubuntu 16.04 and Ubuntu 18.04. @@ -61,42 +60,6 @@ clang-8 --target=wasm32 -O3 \ You will get ```test.wasm``` which is the WASM app binary. - - - - -## Use Emscripten tool - -The last method to build a WASM binary is to use Emscripten tool ```emcc```. -Assuming you are using Linux, you may install emcc from Emscripten EMSDK following the steps below: - -``` -git clone https://github.com/emscripten-core/emsdk.git -cd emsdk -./emsdk install latest-fastcomp -./emsdk activate latest-fastcomp -``` - -The Emscripten website provides other installation methods beyond Linux. - -Use the emcc command below to build the WASM C source code into the WASM binary. - -``` Bash -cd emsdk -source emsdk_env.sh (or add it to ~/.bashrc if you don't want to run it each time) -cd -EMCC_ONLY_FORCED_STDLIBS=1 emcc -g -O3 -s WASM=1 -s ERROR_ON_UNDEFINED_SYMBOLS=0 \ - -s TOTAL_MEMORY=65536 -s TOTAL_STACK=4096 \ - -s ASSERTIONS=1 -s STACK_OVERFLOW_CHECK=2 \ - -s "EXPORTED_FUNCTIONS=['_main']" -o test.wasm test.c -``` - -You will get ```test.wasm``` which is the WASM app binary. - - - - - ## Using Docker Another method availble is using [Docker](https://www.docker.com/). We assume you've already configured Docker (see Platform section above) and have a running interactive shell. Currently the Dockerfile only supports compiling apps with clang, with Emscripten planned for the future. diff --git a/doc/pthread_library.md b/doc/pthread_library.md index cfc6bece..927300c5 100644 --- a/doc/pthread_library.md +++ b/doc/pthread_library.md @@ -57,7 +57,7 @@ To build this C program into WebAssembly app with libc-builtin, you can use this You can also build this program with WASI, but we need to make some changes to wasi-sysroot: -1. disable malloc / free of wasi as they don't support shared memory +1. disable malloc/free of wasi if the wasi-sdk version is smaller than wasi-sdk-12.0 (not include 12.0), as they don't support shared memory: ``` bash /opt/wasi-sdk/bin/llvm-ar -d /opt/wasi-sdk/share/wasi-sysroot/lib/wasm32-wasi/libc.a dlmalloc.o ``` @@ -169,4 +169,4 @@ int pthread_key_delete(pthread_key_t key); ## Known limits - `pthread_attr_t`, `pthread_mutexattr_t` and `pthread_condattr_t` are not supported yet, so please pass `NULL` as the second argument of `pthread_create`, `pthread_mutex_init` and `pthread_cond_init`. - The `errno.o` in wasi-sysroot is not compatible with this feature, so using errno in multi-thread may cause unexpected behavior. -- Currently `struct timespec` is not supported, so the prototype of `pthread_cond_timedwait` is different from the native one, it takes an unsigned int argument `useconds` to indicate the waiting time. \ No newline at end of file +- Currently `struct timespec` is not supported, so the prototype of `pthread_cond_timedwait` is different from the native one, it takes an unsigned int argument `useconds` to indicate the waiting time. diff --git a/wamr-compiler/CMakeLists.txt b/wamr-compiler/CMakeLists.txt index de15a272..03eaf344 100644 --- a/wamr-compiler/CMakeLists.txt +++ b/wamr-compiler/CMakeLists.txt @@ -28,6 +28,9 @@ add_definitions(-DWASM_ENABLE_SHARED_MEMORY=1) add_definitions(-DWASM_ENABLE_THREAD_MGR=1) add_definitions(-DWASM_ENABLE_TAIL_CALL=1) add_definitions(-DWASM_ENABLE_SIMD=1) +add_definitions(-DWASM_ENABLE_CUSTOM_NAME_SECTION=1) +add_definitions(-DWASM_ENABLE_DUMP_CALL_STACK=1) +add_definitions(-DWASM_ENABLE_PERF_PROFILING=1) # Set WAMR_BUILD_TARGET, currently values supported: # "X86_64", "AMD_64", "X86_32", "ARM_32", "MIPS_32", "XTENSA_32" diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index 0fb5710c..d0494116 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -41,9 +41,11 @@ print_help() printf(" llvmir-opt Optimized LLVM IR\n"); printf(" --enable-bulk-memory Enable the post-MVP bulk memory feature\n"); printf(" --enable-multi-thread Enable multi-thread feature, the dependent features bulk-memory and\n"); - printf(" --enable-tail-call Enable the post-MVP tail call feature\n"); printf(" thread-mgr will be enabled automatically\n"); + printf(" --enable-tail-call Enable the post-MVP tail call feature\n"); printf(" --enable-simd Enable the post-MVP 128-bit SIMD feature\n"); + printf(" --enable-dump-call-stack Enable stack trace feature\n"); + printf(" --enable-perf-profiling Enable function performance profiling\n"); printf(" -v=n Set log verbose level (0 to 5, default is 2), larger with more log\n"); printf("Examples: wamrc -o test.aot test.wasm\n"); printf(" wamrc --target=i386 -o test.aot test.wasm\n"); @@ -155,6 +157,12 @@ main(int argc, char *argv[]) else if (!strcmp(argv[0], "--enable-simd")) { option.enable_simd = true; } + else if (!strcmp(argv[0], "--enable-dump-call-stack")) { + option.enable_aux_stack_frame = true; + } + else if (!strcmp(argv[0], "--enable-perf-profiling")) { + option.enable_aux_stack_frame = true; + } else return print_help(); }