diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c index 8ae33a47..0c94ef7c 100644 --- a/core/iwasm/aot/aot_runtime.c +++ b/core/iwasm/aot/aot_runtime.c @@ -30,6 +30,8 @@ bh_static_assert(offsetof(WASMExecEnv, aux_stack_boundary) bh_static_assert(offsetof(WASMExecEnv, aux_stack_bottom) == 7 * sizeof(uintptr_t)); bh_static_assert(offsetof(WASMExecEnv, native_symbol) == 8 * sizeof(uintptr_t)); +bh_static_assert(offsetof(WASMExecEnv, native_stack_top_min) + == 9 * sizeof(uintptr_t)); bh_static_assert(offsetof(AOTModuleInstance, memories) == 1 * sizeof(uint64)); bh_static_assert(offsetof(AOTModuleInstance, func_ptrs) == 5 * sizeof(uint64)); @@ -1257,6 +1259,7 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr, /* Check native stack overflow firstly to ensure we have enough native stack to run the following codes before actually calling the aot function in invokeNative function. */ + RECORD_STACK_USAGE(exec_env, (uint8 *)&module_inst); if ((uint8 *)&module_inst < exec_env->native_stack_boundary + page_size * (guard_page_count + 1)) { aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW); @@ -1856,6 +1859,7 @@ aot_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 table_elem_idx, exec_env->native_stack_boundary must have been set, we don't set it again */ + RECORD_STACK_USAGE(exec_env, (uint8 *)&module_inst); if ((uint8 *)&module_inst < exec_env->native_stack_boundary) { aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW); goto fail; diff --git a/core/iwasm/common/wasm_exec_env.c b/core/iwasm/common/wasm_exec_env.c index 7939c759..9bd74d7d 100644 --- a/core/iwasm/common/wasm_exec_env.c +++ b/core/iwasm/common/wasm_exec_env.c @@ -211,6 +211,7 @@ wasm_exec_env_set_thread_info(WASMExecEnv *exec_env) exec_env->handle = os_self_thread(); exec_env->native_stack_boundary = stack_boundary ? stack_boundary + WASM_STACK_GUARD_SIZE : NULL; + exec_env->native_stack_top_min = (void *)UINTPTR_MAX; } #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/common/wasm_exec_env.h b/core/iwasm/common/wasm_exec_env.h index 39829207..1f139411 100644 --- a/core/iwasm/common/wasm_exec_env.h +++ b/core/iwasm/common/wasm_exec_env.h @@ -84,6 +84,12 @@ typedef struct WASMExecEnv { void **native_symbol; #endif + /* + * The lowest stack pointer value observed. + * Assumption: native stack grows to the lower address. + */ + uint8 *native_stack_top_min; + #if WASM_ENABLE_FAST_JIT != 0 /** * Cache for @@ -165,6 +171,17 @@ typedef struct WASMExecEnv { } wasm_stack; } WASMExecEnv; +#if WASM_ENABLE_MEMORY_PROFILING != 0 +#define RECORD_STACK_USAGE(e, p) \ + do { \ + if ((e)->native_stack_top_min > (p)) { \ + (e)->native_stack_top_min = (p); \ + } \ + } while (0) +#else +#define RECORD_STACK_USAGE(e, p) (void)0 +#endif + WASMExecEnv * wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst, uint32 stack_size); diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index aed92edf..db35b01d 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -1399,6 +1399,22 @@ wasm_runtime_dump_mem_consumption(WASMExecEnv *exec_env) else os_printf("Total aux stack used: no enough info to profile\n"); + /* + * Report the native stack usage estimation. + * + * Unlike the aux stack above, we report the amount unused + * because we don't know the stack "bottom". + * + * Note that this is just about what the runtime itself observed. + * It doesn't cover host func implementations, signal handlers, etc. + */ + if (exec_env->native_stack_top_min != (void *)UINTPTR_MAX) + os_printf("Native stack left: %zd\n", + exec_env->native_stack_top_min + - exec_env->native_stack_boundary); + else + os_printf("Native stack left: no enough info to profile\n"); + os_printf("Total app heap used: %u\n", app_heap_peak_size); } #endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) \ diff --git a/core/iwasm/compilation/aot_compiler.h b/core/iwasm/compilation/aot_compiler.h index d5e45f28..e6031ab8 100644 --- a/core/iwasm/compilation/aot_compiler.h +++ b/core/iwasm/compilation/aot_compiler.h @@ -259,6 +259,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type) #define I32_SIX LLVM_CONST(i32_six) #define I32_SEVEN LLVM_CONST(i32_seven) #define I32_EIGHT LLVM_CONST(i32_eight) +#define I32_NINE LLVM_CONST(i32_nine) #define I32_NEG_ONE LLVM_CONST(i32_neg_one) #define I64_NEG_ONE LLVM_CONST(i64_neg_one) #define I32_MIN LLVM_CONST(i32_min) diff --git a/core/iwasm/compilation/aot_emit_function.c b/core/iwasm/compilation/aot_emit_function.c index d6a5a4ab..4ac62a9e 100644 --- a/core/iwasm/compilation/aot_emit_function.c +++ b/core/iwasm/compilation/aot_emit_function.c @@ -366,6 +366,87 @@ fail: #endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \ || (WASM_ENABLE_PERF_PROFILING != 0) */ +static bool +record_stack_usage(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + uint32 callee_cell_num) +{ + LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder); + LLVMBasicBlockRef block_update; + LLVMBasicBlockRef block_after_update; + LLVMValueRef callee_local_size, new_sp, cmp; + LLVMValueRef native_stack_top_min; + LLVMTypeRef ptrdiff_type; + if (comp_ctx->pointer_size == sizeof(uint64_t)) { + ptrdiff_type = I64_TYPE; + } + else { + ptrdiff_type = I32_TYPE; + } + + /* + * new_sp = last_alloca - callee_local_size; + * if (*native_stack_top_min_addr > new_sp) { + * *native_stack_top_min_addr = new_sp; + * } + */ + + if (!(callee_local_size = LLVMConstInt( + ptrdiff_type, -(int64_t)callee_cell_num * 4, true))) { + aot_set_last_error("llvm build const failed."); + return false; + } + if (!(new_sp = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE, + func_ctx->last_alloca, + &callee_local_size, 1, "new_sp"))) { + aot_set_last_error("llvm build gep failed"); + return false; + } + if (!(native_stack_top_min = LLVMBuildLoad2( + comp_ctx->builder, OPQ_PTR_TYPE, + func_ctx->native_stack_top_min_addr, "native_stack_top_min"))) { + aot_set_last_error("llvm build load failed"); + return false; + } + if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT, new_sp, + native_stack_top_min, "cmp"))) { + aot_set_last_error("llvm build icmp failed."); + return false; + } + + if (!(block_update = LLVMAppendBasicBlockInContext( + comp_ctx->context, func_ctx->func, "block_update"))) { + aot_set_last_error("llvm add basic block failed."); + return false; + } + if (!(block_after_update = LLVMAppendBasicBlockInContext( + comp_ctx->context, func_ctx->func, "block_after_update"))) { + aot_set_last_error("llvm add basic block failed."); + return false; + } + LLVMMoveBasicBlockAfter(block_update, block_curr); + LLVMMoveBasicBlockAfter(block_after_update, block_update); + + if (!LLVMBuildCondBr(comp_ctx->builder, cmp, block_update, + block_after_update)) { + aot_set_last_error("llvm build cond br failed."); + return false; + } + + LLVMPositionBuilderAtEnd(comp_ctx->builder, block_update); + if (!LLVMBuildStore(comp_ctx->builder, new_sp, + func_ctx->native_stack_top_min_addr)) { + aot_set_last_error("llvm build store failed"); + return false; + } + if (!LLVMBuildBr(comp_ctx->builder, block_after_update)) { + aot_set_last_error("llvm build br failed."); + return false; + } + + LLVMPositionBuilderAtEnd(comp_ctx->builder, block_after_update); + return true; +} + static bool check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 callee_cell_num) @@ -409,6 +490,19 @@ check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, return true; } +static bool +check_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, + uint32 callee_cell_num) +{ + if (comp_ctx->enable_stack_estimation + && !record_stack_usage(comp_ctx, func_ctx, callee_cell_num)) + return false; + if (comp_ctx->enable_stack_bound_check + && !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num)) + return false; + return true; +} + /** * Check whether the app address and its buffer are inside the linear memory, * if no, throw exception @@ -852,8 +946,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, callee_cell_num = aot_func->param_cell_num + aot_func->local_cell_num + 1; - if (comp_ctx->enable_stack_bound_check - && !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num)) + if (!check_stack(comp_ctx, func_ctx, callee_cell_num)) goto fail; #if LLVM_VERSION_MAJOR >= 14 @@ -1467,12 +1560,11 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, /* Translate call non-import block */ LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_non_import); - if (comp_ctx->enable_stack_bound_check - && !check_stack_boundary(comp_ctx, func_ctx, - param_cell_num + ext_cell_num - + 1 - /* Reserve some local variables */ - + 16)) + if (!check_stack(comp_ctx, func_ctx, + param_cell_num + ext_cell_num + + 1 + /* Reserve some local variables */ + + 16)) goto fail; /* Load function pointer */ diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 889ab259..27550560 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -286,6 +286,21 @@ create_native_stack_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) return true; } +static bool +create_native_stack_top_min(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef offset = I32_NINE; + + if (!(func_ctx->native_stack_top_min_addr = LLVMBuildInBoundsGEP2( + comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env, &offset, 1, + "native_stack_top_min_addr"))) { + aot_set_last_error("llvm build in bounds gep failed"); + return false; + } + + return true; +} + static bool create_aux_stack_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { @@ -434,7 +449,8 @@ create_local_variables(AOTCompData *comp_data, AOTCompContext *comp_ctx, } } - if (comp_ctx->enable_stack_bound_check) { + if (comp_ctx->enable_stack_bound_check + || comp_ctx->enable_stack_estimation) { if (aot_func_type->param_count + func->local_count > 0) { func_ctx->last_alloca = func_ctx->locals[aot_func_type->param_count + func->local_count - 1]; @@ -963,6 +979,10 @@ aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx, && !create_native_stack_bound(comp_ctx, func_ctx)) { goto fail; } + if (comp_ctx->enable_stack_estimation + && !create_native_stack_top_min(comp_ctx, func_ctx)) { + goto fail; + } /* Get auxiliary stack info */ if (wasm_func->has_op_set_global_aux_stack @@ -1622,6 +1642,9 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option) if (option->disable_llvm_lto) comp_ctx->disable_llvm_lto = true; + if (option->enable_stack_estimation) + comp_ctx->enable_stack_estimation = true; + comp_ctx->opt_level = option->opt_level; comp_ctx->size_level = option->size_level; diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 219f36e7..b982e808 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -163,6 +163,7 @@ typedef struct AOTFuncContext { LLVMValueRef aot_inst; LLVMValueRef argv_buf; LLVMValueRef native_stack_bound; + LLVMValueRef native_stack_top_min_addr; LLVMValueRef aux_stack_bound; LLVMValueRef aux_stack_bottom; LLVMValueRef native_symbol; @@ -313,6 +314,9 @@ typedef struct AOTCompContext { /* Native stack bounday Check */ bool enable_stack_bound_check; + /* Native stack usage estimation */ + bool enable_stack_estimation; + /* 128-bit SIMD */ bool enable_simd; @@ -403,6 +407,7 @@ typedef struct AOTCompOption { bool enable_aux_stack_frame; bool disable_llvm_intrinsics; bool disable_llvm_lto; + bool enable_stack_estimation; uint32 opt_level; uint32 size_level; uint32 output_format; diff --git a/core/iwasm/include/aot_export.h b/core/iwasm/include/aot_export.h index f8df168e..792a4baa 100644 --- a/core/iwasm/include/aot_export.h +++ b/core/iwasm/include/aot_export.h @@ -55,6 +55,7 @@ typedef struct AOTCompOption { bool enable_aux_stack_frame; bool disable_llvm_intrinsics; bool disable_llvm_lto; + bool enable_stack_estimation; uint32_t opt_level; uint32_t size_level; uint32_t output_format; diff --git a/core/iwasm/interpreter/wasm_interp_classic.c b/core/iwasm/interpreter/wasm_interp_classic.c index 3e16b380..3c43dcdc 100644 --- a/core/iwasm/interpreter/wasm_interp_classic.c +++ b/core/iwasm/interpreter/wasm_interp_classic.c @@ -4150,6 +4150,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, } argc = function->param_cell_num; + RECORD_STACK_USAGE(exec_env, (uint8 *)&prev_frame); #if !(defined(OS_ENABLE_HW_BOUND_CHECK) \ && WASM_DISABLE_STACK_HW_BOUND_CHECK == 0) if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 0ea920f2..7209f7bd 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -3901,6 +3901,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env, } argc = function->param_cell_num; + RECORD_STACK_USAGE(exec_env, (uint8 *)&prev_frame); #if !(defined(OS_ENABLE_HW_BOUND_CHECK) \ && WASM_DISABLE_STACK_HW_BOUND_CHECK == 0) if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) { diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index d0a10b86..f60f9efb 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -3048,6 +3048,9 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) option.enable_aux_stack_frame = true; #endif +#if WASM_ENABLE_MEMORY_PROFILING != 0 + option.enable_stack_estimation = true; +#endif module->comp_ctx = aot_create_comp_context(module->comp_data, &option); if (!module->comp_ctx) { diff --git a/core/iwasm/interpreter/wasm_mini_loader.c b/core/iwasm/interpreter/wasm_mini_loader.c index b1243a06..815dffa0 100644 --- a/core/iwasm/interpreter/wasm_mini_loader.c +++ b/core/iwasm/interpreter/wasm_mini_loader.c @@ -1894,6 +1894,9 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf, #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) option.enable_aux_stack_frame = true; #endif +#if WASM_ENABLE_MEMORY_PROFILING != 0 + option.enable_stack_estimation = true; +#endif module->comp_ctx = aot_create_comp_context(module->comp_data, &option); if (!module->comp_ctx) { diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c index ff3f4f2f..baded7aa 100644 --- a/core/iwasm/interpreter/wasm_runtime.c +++ b/core/iwasm/interpreter/wasm_runtime.c @@ -2074,6 +2074,7 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst, /* Check native stack overflow firstly to ensure we have enough native stack to run the following codes before actually calling the aot function in invokeNative function. */ + RECORD_STACK_USAGE(exec_env, (uint8 *)&exec_env_tls); if ((uint8 *)&exec_env_tls < exec_env->native_stack_boundary + page_size * (guard_page_count + 1)) { wasm_set_exception(module_inst, "native stack overflow"); diff --git a/wamr-compiler/main.c b/wamr-compiler/main.c index 28986e86..f185a17b 100644 --- a/wamr-compiler/main.c +++ b/wamr-compiler/main.c @@ -59,6 +59,7 @@ print_help() printf(" --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n"); printf(" --enable-dump-call-stack Enable stack trace feature\n"); printf(" --enable-perf-profiling Enable function performance profiling\n"); + printf(" --enable-memory-profiling Enable memory usage profiling\n"); printf(" --enable-indirect-mode Enalbe call function through symbol table but not direct call\n"); printf(" --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n"); printf(" --disable-llvm-lto Disable the LLVM link time optimization\n"); @@ -254,6 +255,9 @@ main(int argc, char *argv[]) else if (!strcmp(argv[0], "--enable-perf-profiling")) { option.enable_aux_stack_frame = true; } + else if (!strcmp(argv[0], "--enable-memory-profiling")) { + option.enable_stack_estimation = true; + } else if (!strcmp(argv[0], "--enable-indirect-mode")) { option.is_indirect_mode = true; }