Make memory profiling show native stack usage (#1917)

This commit is contained in:
YAMAMOTO Takashi 2023-02-01 12:52:15 +09:00 committed by GitHub
parent 0435acdd43
commit 7d3b2a8773
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 182 additions and 9 deletions

View File

@ -30,6 +30,8 @@ bh_static_assert(offsetof(WASMExecEnv, aux_stack_boundary)
bh_static_assert(offsetof(WASMExecEnv, aux_stack_bottom) bh_static_assert(offsetof(WASMExecEnv, aux_stack_bottom)
== 7 * sizeof(uintptr_t)); == 7 * sizeof(uintptr_t));
bh_static_assert(offsetof(WASMExecEnv, native_symbol) == 8 * sizeof(uintptr_t)); bh_static_assert(offsetof(WASMExecEnv, native_symbol) == 8 * sizeof(uintptr_t));
bh_static_assert(offsetof(WASMExecEnv, native_stack_top_min)
== 9 * sizeof(uintptr_t));
bh_static_assert(offsetof(AOTModuleInstance, memories) == 1 * sizeof(uint64)); bh_static_assert(offsetof(AOTModuleInstance, memories) == 1 * sizeof(uint64));
bh_static_assert(offsetof(AOTModuleInstance, func_ptrs) == 5 * sizeof(uint64)); bh_static_assert(offsetof(AOTModuleInstance, func_ptrs) == 5 * sizeof(uint64));
@ -1257,6 +1259,7 @@ invoke_native_with_hw_bound_check(WASMExecEnv *exec_env, void *func_ptr,
/* Check native stack overflow firstly to ensure we have enough /* Check native stack overflow firstly to ensure we have enough
native stack to run the following codes before actually calling native stack to run the following codes before actually calling
the aot function in invokeNative function. */ the aot function in invokeNative function. */
RECORD_STACK_USAGE(exec_env, (uint8 *)&module_inst);
if ((uint8 *)&module_inst < exec_env->native_stack_boundary if ((uint8 *)&module_inst < exec_env->native_stack_boundary
+ page_size * (guard_page_count + 1)) { + page_size * (guard_page_count + 1)) {
aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW); aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW);
@ -1856,6 +1859,7 @@ aot_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 table_elem_idx,
exec_env->native_stack_boundary must have been set, we don't set exec_env->native_stack_boundary must have been set, we don't set
it again */ it again */
RECORD_STACK_USAGE(exec_env, (uint8 *)&module_inst);
if ((uint8 *)&module_inst < exec_env->native_stack_boundary) { if ((uint8 *)&module_inst < exec_env->native_stack_boundary) {
aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW); aot_set_exception_with_id(module_inst, EXCE_NATIVE_STACK_OVERFLOW);
goto fail; goto fail;

View File

@ -211,6 +211,7 @@ wasm_exec_env_set_thread_info(WASMExecEnv *exec_env)
exec_env->handle = os_self_thread(); exec_env->handle = os_self_thread();
exec_env->native_stack_boundary = exec_env->native_stack_boundary =
stack_boundary ? stack_boundary + WASM_STACK_GUARD_SIZE : NULL; stack_boundary ? stack_boundary + WASM_STACK_GUARD_SIZE : NULL;
exec_env->native_stack_top_min = (void *)UINTPTR_MAX;
} }
#if WASM_ENABLE_THREAD_MGR != 0 #if WASM_ENABLE_THREAD_MGR != 0

View File

@ -84,6 +84,12 @@ typedef struct WASMExecEnv {
void **native_symbol; void **native_symbol;
#endif #endif
/*
* The lowest stack pointer value observed.
* Assumption: native stack grows to the lower address.
*/
uint8 *native_stack_top_min;
#if WASM_ENABLE_FAST_JIT != 0 #if WASM_ENABLE_FAST_JIT != 0
/** /**
* Cache for * Cache for
@ -165,6 +171,17 @@ typedef struct WASMExecEnv {
} wasm_stack; } wasm_stack;
} WASMExecEnv; } WASMExecEnv;
#if WASM_ENABLE_MEMORY_PROFILING != 0
#define RECORD_STACK_USAGE(e, p) \
do { \
if ((e)->native_stack_top_min > (p)) { \
(e)->native_stack_top_min = (p); \
} \
} while (0)
#else
#define RECORD_STACK_USAGE(e, p) (void)0
#endif
WASMExecEnv * WASMExecEnv *
wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst, wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst,
uint32 stack_size); uint32 stack_size);

View File

@ -1399,6 +1399,22 @@ wasm_runtime_dump_mem_consumption(WASMExecEnv *exec_env)
else else
os_printf("Total aux stack used: no enough info to profile\n"); os_printf("Total aux stack used: no enough info to profile\n");
/*
* Report the native stack usage estimation.
*
* Unlike the aux stack above, we report the amount unused
* because we don't know the stack "bottom".
*
* Note that this is just about what the runtime itself observed.
* It doesn't cover host func implementations, signal handlers, etc.
*/
if (exec_env->native_stack_top_min != (void *)UINTPTR_MAX)
os_printf("Native stack left: %zd\n",
exec_env->native_stack_top_min
- exec_env->native_stack_boundary);
else
os_printf("Native stack left: no enough info to profile\n");
os_printf("Total app heap used: %u\n", app_heap_peak_size); os_printf("Total app heap used: %u\n", app_heap_peak_size);
} }
#endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) \ #endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0) \

View File

@ -259,6 +259,7 @@ check_type_compatible(uint8 src_type, uint8 dst_type)
#define I32_SIX LLVM_CONST(i32_six) #define I32_SIX LLVM_CONST(i32_six)
#define I32_SEVEN LLVM_CONST(i32_seven) #define I32_SEVEN LLVM_CONST(i32_seven)
#define I32_EIGHT LLVM_CONST(i32_eight) #define I32_EIGHT LLVM_CONST(i32_eight)
#define I32_NINE LLVM_CONST(i32_nine)
#define I32_NEG_ONE LLVM_CONST(i32_neg_one) #define I32_NEG_ONE LLVM_CONST(i32_neg_one)
#define I64_NEG_ONE LLVM_CONST(i64_neg_one) #define I64_NEG_ONE LLVM_CONST(i64_neg_one)
#define I32_MIN LLVM_CONST(i32_min) #define I32_MIN LLVM_CONST(i32_min)

View File

@ -366,6 +366,87 @@ fail:
#endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \ #endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0) \
|| (WASM_ENABLE_PERF_PROFILING != 0) */ || (WASM_ENABLE_PERF_PROFILING != 0) */
static bool
record_stack_usage(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 callee_cell_num)
{
LLVMBasicBlockRef block_curr = LLVMGetInsertBlock(comp_ctx->builder);
LLVMBasicBlockRef block_update;
LLVMBasicBlockRef block_after_update;
LLVMValueRef callee_local_size, new_sp, cmp;
LLVMValueRef native_stack_top_min;
LLVMTypeRef ptrdiff_type;
if (comp_ctx->pointer_size == sizeof(uint64_t)) {
ptrdiff_type = I64_TYPE;
}
else {
ptrdiff_type = I32_TYPE;
}
/*
* new_sp = last_alloca - callee_local_size;
* if (*native_stack_top_min_addr > new_sp) {
* *native_stack_top_min_addr = new_sp;
* }
*/
if (!(callee_local_size = LLVMConstInt(
ptrdiff_type, -(int64_t)callee_cell_num * 4, true))) {
aot_set_last_error("llvm build const failed.");
return false;
}
if (!(new_sp = LLVMBuildInBoundsGEP2(comp_ctx->builder, INT8_TYPE,
func_ctx->last_alloca,
&callee_local_size, 1, "new_sp"))) {
aot_set_last_error("llvm build gep failed");
return false;
}
if (!(native_stack_top_min = LLVMBuildLoad2(
comp_ctx->builder, OPQ_PTR_TYPE,
func_ctx->native_stack_top_min_addr, "native_stack_top_min"))) {
aot_set_last_error("llvm build load failed");
return false;
}
if (!(cmp = LLVMBuildICmp(comp_ctx->builder, LLVMIntULT, new_sp,
native_stack_top_min, "cmp"))) {
aot_set_last_error("llvm build icmp failed.");
return false;
}
if (!(block_update = LLVMAppendBasicBlockInContext(
comp_ctx->context, func_ctx->func, "block_update"))) {
aot_set_last_error("llvm add basic block failed.");
return false;
}
if (!(block_after_update = LLVMAppendBasicBlockInContext(
comp_ctx->context, func_ctx->func, "block_after_update"))) {
aot_set_last_error("llvm add basic block failed.");
return false;
}
LLVMMoveBasicBlockAfter(block_update, block_curr);
LLVMMoveBasicBlockAfter(block_after_update, block_update);
if (!LLVMBuildCondBr(comp_ctx->builder, cmp, block_update,
block_after_update)) {
aot_set_last_error("llvm build cond br failed.");
return false;
}
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_update);
if (!LLVMBuildStore(comp_ctx->builder, new_sp,
func_ctx->native_stack_top_min_addr)) {
aot_set_last_error("llvm build store failed");
return false;
}
if (!LLVMBuildBr(comp_ctx->builder, block_after_update)) {
aot_set_last_error("llvm build br failed.");
return false;
}
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_after_update);
return true;
}
static bool static bool
check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 callee_cell_num) uint32 callee_cell_num)
@ -409,6 +490,19 @@ check_stack_boundary(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
return true; return true;
} }
static bool
check_stack(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
uint32 callee_cell_num)
{
if (comp_ctx->enable_stack_estimation
&& !record_stack_usage(comp_ctx, func_ctx, callee_cell_num))
return false;
if (comp_ctx->enable_stack_bound_check
&& !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num))
return false;
return true;
}
/** /**
* Check whether the app address and its buffer are inside the linear memory, * Check whether the app address and its buffer are inside the linear memory,
* if no, throw exception * if no, throw exception
@ -852,8 +946,7 @@ aot_compile_op_call(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
callee_cell_num = callee_cell_num =
aot_func->param_cell_num + aot_func->local_cell_num + 1; aot_func->param_cell_num + aot_func->local_cell_num + 1;
if (comp_ctx->enable_stack_bound_check if (!check_stack(comp_ctx, func_ctx, callee_cell_num))
&& !check_stack_boundary(comp_ctx, func_ctx, callee_cell_num))
goto fail; goto fail;
#if LLVM_VERSION_MAJOR >= 14 #if LLVM_VERSION_MAJOR >= 14
@ -1467,12 +1560,11 @@ aot_compile_op_call_indirect(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
/* Translate call non-import block */ /* Translate call non-import block */
LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_non_import); LLVMPositionBuilderAtEnd(comp_ctx->builder, block_call_non_import);
if (comp_ctx->enable_stack_bound_check if (!check_stack(comp_ctx, func_ctx,
&& !check_stack_boundary(comp_ctx, func_ctx, param_cell_num + ext_cell_num
param_cell_num + ext_cell_num + 1
+ 1 /* Reserve some local variables */
/* Reserve some local variables */ + 16))
+ 16))
goto fail; goto fail;
/* Load function pointer */ /* Load function pointer */

View File

@ -286,6 +286,21 @@ create_native_stack_bound(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
return true; return true;
} }
static bool
create_native_stack_top_min(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
{
LLVMValueRef offset = I32_NINE;
if (!(func_ctx->native_stack_top_min_addr = LLVMBuildInBoundsGEP2(
comp_ctx->builder, OPQ_PTR_TYPE, func_ctx->exec_env, &offset, 1,
"native_stack_top_min_addr"))) {
aot_set_last_error("llvm build in bounds gep failed");
return false;
}
return true;
}
static bool static bool
create_aux_stack_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) create_aux_stack_info(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
{ {
@ -434,7 +449,8 @@ create_local_variables(AOTCompData *comp_data, AOTCompContext *comp_ctx,
} }
} }
if (comp_ctx->enable_stack_bound_check) { if (comp_ctx->enable_stack_bound_check
|| comp_ctx->enable_stack_estimation) {
if (aot_func_type->param_count + func->local_count > 0) { if (aot_func_type->param_count + func->local_count > 0) {
func_ctx->last_alloca = func_ctx->locals[aot_func_type->param_count func_ctx->last_alloca = func_ctx->locals[aot_func_type->param_count
+ func->local_count - 1]; + func->local_count - 1];
@ -963,6 +979,10 @@ aot_create_func_context(AOTCompData *comp_data, AOTCompContext *comp_ctx,
&& !create_native_stack_bound(comp_ctx, func_ctx)) { && !create_native_stack_bound(comp_ctx, func_ctx)) {
goto fail; goto fail;
} }
if (comp_ctx->enable_stack_estimation
&& !create_native_stack_top_min(comp_ctx, func_ctx)) {
goto fail;
}
/* Get auxiliary stack info */ /* Get auxiliary stack info */
if (wasm_func->has_op_set_global_aux_stack if (wasm_func->has_op_set_global_aux_stack
@ -1622,6 +1642,9 @@ aot_create_comp_context(AOTCompData *comp_data, aot_comp_option_t option)
if (option->disable_llvm_lto) if (option->disable_llvm_lto)
comp_ctx->disable_llvm_lto = true; comp_ctx->disable_llvm_lto = true;
if (option->enable_stack_estimation)
comp_ctx->enable_stack_estimation = true;
comp_ctx->opt_level = option->opt_level; comp_ctx->opt_level = option->opt_level;
comp_ctx->size_level = option->size_level; comp_ctx->size_level = option->size_level;

View File

@ -163,6 +163,7 @@ typedef struct AOTFuncContext {
LLVMValueRef aot_inst; LLVMValueRef aot_inst;
LLVMValueRef argv_buf; LLVMValueRef argv_buf;
LLVMValueRef native_stack_bound; LLVMValueRef native_stack_bound;
LLVMValueRef native_stack_top_min_addr;
LLVMValueRef aux_stack_bound; LLVMValueRef aux_stack_bound;
LLVMValueRef aux_stack_bottom; LLVMValueRef aux_stack_bottom;
LLVMValueRef native_symbol; LLVMValueRef native_symbol;
@ -313,6 +314,9 @@ typedef struct AOTCompContext {
/* Native stack bounday Check */ /* Native stack bounday Check */
bool enable_stack_bound_check; bool enable_stack_bound_check;
/* Native stack usage estimation */
bool enable_stack_estimation;
/* 128-bit SIMD */ /* 128-bit SIMD */
bool enable_simd; bool enable_simd;
@ -403,6 +407,7 @@ typedef struct AOTCompOption {
bool enable_aux_stack_frame; bool enable_aux_stack_frame;
bool disable_llvm_intrinsics; bool disable_llvm_intrinsics;
bool disable_llvm_lto; bool disable_llvm_lto;
bool enable_stack_estimation;
uint32 opt_level; uint32 opt_level;
uint32 size_level; uint32 size_level;
uint32 output_format; uint32 output_format;

View File

@ -55,6 +55,7 @@ typedef struct AOTCompOption {
bool enable_aux_stack_frame; bool enable_aux_stack_frame;
bool disable_llvm_intrinsics; bool disable_llvm_intrinsics;
bool disable_llvm_lto; bool disable_llvm_lto;
bool enable_stack_estimation;
uint32_t opt_level; uint32_t opt_level;
uint32_t size_level; uint32_t size_level;
uint32_t output_format; uint32_t output_format;

View File

@ -4150,6 +4150,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
} }
argc = function->param_cell_num; argc = function->param_cell_num;
RECORD_STACK_USAGE(exec_env, (uint8 *)&prev_frame);
#if !(defined(OS_ENABLE_HW_BOUND_CHECK) \ #if !(defined(OS_ENABLE_HW_BOUND_CHECK) \
&& WASM_DISABLE_STACK_HW_BOUND_CHECK == 0) && WASM_DISABLE_STACK_HW_BOUND_CHECK == 0)
if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) { if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) {

View File

@ -3901,6 +3901,7 @@ wasm_interp_call_wasm(WASMModuleInstance *module_inst, WASMExecEnv *exec_env,
} }
argc = function->param_cell_num; argc = function->param_cell_num;
RECORD_STACK_USAGE(exec_env, (uint8 *)&prev_frame);
#if !(defined(OS_ENABLE_HW_BOUND_CHECK) \ #if !(defined(OS_ENABLE_HW_BOUND_CHECK) \
&& WASM_DISABLE_STACK_HW_BOUND_CHECK == 0) && WASM_DISABLE_STACK_HW_BOUND_CHECK == 0)
if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) { if ((uint8 *)&prev_frame < exec_env->native_stack_boundary) {

View File

@ -3048,6 +3048,9 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
option.enable_aux_stack_frame = true; option.enable_aux_stack_frame = true;
#endif #endif
#if WASM_ENABLE_MEMORY_PROFILING != 0
option.enable_stack_estimation = true;
#endif
module->comp_ctx = aot_create_comp_context(module->comp_data, &option); module->comp_ctx = aot_create_comp_context(module->comp_data, &option);
if (!module->comp_ctx) { if (!module->comp_ctx) {

View File

@ -1894,6 +1894,9 @@ init_llvm_jit_functions_stage1(WASMModule *module, char *error_buf,
#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0) #if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
option.enable_aux_stack_frame = true; option.enable_aux_stack_frame = true;
#endif #endif
#if WASM_ENABLE_MEMORY_PROFILING != 0
option.enable_stack_estimation = true;
#endif
module->comp_ctx = aot_create_comp_context(module->comp_data, &option); module->comp_ctx = aot_create_comp_context(module->comp_data, &option);
if (!module->comp_ctx) { if (!module->comp_ctx) {

View File

@ -2074,6 +2074,7 @@ call_wasm_with_hw_bound_check(WASMModuleInstance *module_inst,
/* Check native stack overflow firstly to ensure we have enough /* Check native stack overflow firstly to ensure we have enough
native stack to run the following codes before actually calling native stack to run the following codes before actually calling
the aot function in invokeNative function. */ the aot function in invokeNative function. */
RECORD_STACK_USAGE(exec_env, (uint8 *)&exec_env_tls);
if ((uint8 *)&exec_env_tls < exec_env->native_stack_boundary if ((uint8 *)&exec_env_tls < exec_env->native_stack_boundary
+ page_size * (guard_page_count + 1)) { + page_size * (guard_page_count + 1)) {
wasm_set_exception(module_inst, "native stack overflow"); wasm_set_exception(module_inst, "native stack overflow");

View File

@ -59,6 +59,7 @@ print_help()
printf(" --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n"); printf(" --disable-aux-stack-check Disable auxiliary stack overflow/underflow check\n");
printf(" --enable-dump-call-stack Enable stack trace feature\n"); printf(" --enable-dump-call-stack Enable stack trace feature\n");
printf(" --enable-perf-profiling Enable function performance profiling\n"); printf(" --enable-perf-profiling Enable function performance profiling\n");
printf(" --enable-memory-profiling Enable memory usage profiling\n");
printf(" --enable-indirect-mode Enalbe call function through symbol table but not direct call\n"); printf(" --enable-indirect-mode Enalbe call function through symbol table but not direct call\n");
printf(" --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n"); printf(" --disable-llvm-intrinsics Disable the LLVM built-in intrinsics\n");
printf(" --disable-llvm-lto Disable the LLVM link time optimization\n"); printf(" --disable-llvm-lto Disable the LLVM link time optimization\n");
@ -254,6 +255,9 @@ main(int argc, char *argv[])
else if (!strcmp(argv[0], "--enable-perf-profiling")) { else if (!strcmp(argv[0], "--enable-perf-profiling")) {
option.enable_aux_stack_frame = true; option.enable_aux_stack_frame = true;
} }
else if (!strcmp(argv[0], "--enable-memory-profiling")) {
option.enable_stack_estimation = true;
}
else if (!strcmp(argv[0], "--enable-indirect-mode")) { else if (!strcmp(argv[0], "--enable-indirect-mode")) {
option.is_indirect_mode = true; option.is_indirect_mode = true;
} }