Implement Fast JIT dump call stack and perf profiling (#1633)

Implement dump call stack and perf profiling features for Fast JIT,
and refine some code.
This commit is contained in:
Wenyong Huang 2022-10-27 09:28:32 +08:00 committed by GitHub
parent 6adf9194d4
commit ef21f0c951
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 240 additions and 139 deletions

View File

@ -1286,7 +1286,7 @@ run_pass(void *ctx, LLVMModuleRef module)
/*AOTCompContext *comp_ctx = (AOTCompContext *)ctx;*/
size_t len;
LOG_VERBOSE("--- In IRTransformLayer @ T#%ld---",
LOG_VERBOSE("--- In IRTransformLayer @ M#%s, T#%ld---",
LLVMGetModuleIdentifier(module, &len), pthread_self());
/* TODO: enable this for JIT mode after fixing LLVM issues */

View File

@ -5823,13 +5823,23 @@ lower_callnative(JitCompContext *cc, x86::Assembler &a, JitInsn *insn)
a.call(regs_i64[REG_RAX_IDX]);
if (ret_reg) {
bh_assert((jit_reg_kind(ret_reg) == JIT_REG_KIND_I32
&& jit_reg_no(ret_reg) == REG_EAX_IDX)
|| (jit_reg_kind(ret_reg) == JIT_REG_KIND_I64
&& jit_reg_no(ret_reg) == REG_RAX_IDX)
|| ((jit_reg_kind(ret_reg) == JIT_REG_KIND_F32
|| jit_reg_kind(ret_reg) == JIT_REG_KIND_F64)
&& jit_reg_no(ret_reg) == 0));
uint32 ret_reg_no = jit_reg_no(ret_reg);
if (jit_reg_kind(ret_reg) == JIT_REG_KIND_I64) {
CHECK_I64_REG_NO(ret_reg_no);
/* mov res, rax */
mov_r_to_r_i64(a, ret_reg_no, REG_RAX_IDX);
}
else if (jit_reg_kind(ret_reg) == JIT_REG_KIND_F64) {
CHECK_F64_REG_NO(ret_reg_no);
/* mov res, xmm0_f64 */
mov_r_to_r_f64(a, ret_reg_no, 0);
}
else {
bh_assert((jit_reg_kind(ret_reg) == JIT_REG_KIND_I32
&& ret_reg_no == REG_EAX_IDX)
|| (jit_reg_kind(ret_reg) == JIT_REG_KIND_F32
&& ret_reg_no == 0));
}
}
return true;

View File

@ -5,6 +5,7 @@
#include "jit_emit_control.h"
#include "jit_emit_exception.h"
#include "jit_emit_function.h"
#include "../jit_frontend.h"
#include "../interpreter/wasm_loader.h"
@ -380,11 +381,51 @@ copy_block_arities(JitCompContext *cc, JitReg dst_frame_sp, uint8 *dst_types,
}
}
static void
static bool
handle_func_return(JitCompContext *cc, JitBlock *block)
{
JitReg prev_frame, prev_frame_sp;
JitReg ret_reg = 0;
#if WASM_ENABLE_PERF_PROFILING != 0
JitReg func_inst = jit_cc_new_reg_ptr(cc);
JitReg time_start = jit_cc_new_reg_I64(cc);
JitReg time_end = jit_cc_new_reg_I64(cc);
JitReg cur_exec_time = jit_cc_new_reg_I64(cc);
JitReg total_exec_time = jit_cc_new_reg_I64(cc);
JitReg total_exec_cnt = jit_cc_new_reg_I32(cc);
#endif
#if WASM_ENABLE_PERF_PROFILING != 0
/* time_end = os_time_get_boot_microsecond() */
if (!jit_emit_callnative(cc, os_time_get_boot_microsecond, time_end, NULL,
0)) {
return false;
}
/* time_start = cur_frame->time_started */
GEN_INSN(LDI64, time_start, cc->fp_reg,
NEW_CONST(I32, offsetof(WASMInterpFrame, time_started)));
/* cur_exec_time = time_end - time_start */
GEN_INSN(SUB, cur_exec_time, time_end, time_start);
/* func_inst = cur_frame->function */
GEN_INSN(LDPTR, func_inst, cc->fp_reg,
NEW_CONST(I32, offsetof(WASMInterpFrame, function)));
/* total_exec_time = func_inst->total_exec_time */
GEN_INSN(LDI64, total_exec_time, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_time)));
/* total_exec_time += cur_exec_time */
GEN_INSN(ADD, total_exec_time, total_exec_time, cur_exec_time);
/* func_inst->total_exec_time = total_exec_time */
GEN_INSN(STI64, total_exec_time, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_time)));
/* totoal_exec_cnt = func_inst->total_exec_cnt */
GEN_INSN(LDI32, total_exec_cnt, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_cnt)));
/* total_exec_cnt++ */
GEN_INSN(ADD, total_exec_cnt, total_exec_cnt, NEW_CONST(I32, 1));
/* func_inst->total_exec_cnt = total_exec_cnt */
GEN_INSN(STI32, total_exec_cnt, func_inst,
NEW_CONST(I32, offsetof(WASMFunctionInstance, total_exec_cnt)));
#endif
prev_frame = jit_cc_new_reg_ptr(cc);
prev_frame_sp = jit_cc_new_reg_ptr(cc);
@ -420,6 +461,8 @@ handle_func_return(JitCompContext *cc, JitBlock *block)
GEN_INSN(MOV, cc->fp_reg, prev_frame);
/* return 0 */
GEN_INSN(RETURNBC, NEW_CONST(I32, JIT_INTERP_ACTION_NORMAL), ret_reg, 0);
return true;
}
/**
@ -446,7 +489,9 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic)
create the end basic block, just continue to translate
the following opcodes */
if (block->label_type == LABEL_TYPE_FUNCTION) {
handle_func_return(cc, block);
if (!handle_func_return(cc, block)) {
return false;
}
SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
clear_values(jit_frame);
}
@ -548,7 +593,10 @@ handle_op_end(JitCompContext *cc, uint8 **p_frame_ip, bool is_block_polymorphic)
block = jit_block_stack_pop(&cc->block_stack);
if (block->label_type == LABEL_TYPE_FUNCTION) {
handle_func_return(cc, block);
if (!handle_func_return(cc, block)) {
jit_block_destroy(block);
goto fail;
}
SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
clear_values(jit_frame);
}
@ -1190,7 +1238,9 @@ jit_compile_op_return(JitCompContext *cc, uint8 **p_frame_ip)
bh_assert(block_func);
handle_func_return(cc, block_func);
if (!handle_func_return(cc, block_func)) {
return false;
}
SET_BB_END_BCIP(cc->cur_basic_block, *p_frame_ip - 1);
clear_values(cc->jit_frame);

View File

@ -800,11 +800,9 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
char *f32_arg_names[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" };
char *f64_arg_names[] = { "xmm0_f64", "xmm1_f64", "xmm2_f64",
"xmm3_f64", "xmm4_f64", "xmm5_f64" };
JitReg i64_arg_regs[6], f32_arg_regs[6], f64_arg_regs[6], res_hreg = 0;
JitReg i64_arg_regs[6], f32_arg_regs[6], f64_arg_regs[6], res_reg = 0;
JitReg eax_hreg = jit_codegen_get_hreg_by_name("eax");
JitReg rax_hreg = jit_codegen_get_hreg_by_name("rax");
JitReg xmm0_hreg = jit_codegen_get_hreg_by_name("xmm0");
JitReg xmm0_f64_hreg = jit_codegen_get_hreg_by_name("xmm0_f64");
uint32 i, i64_reg_idx, float_reg_idx;
bh_assert(param_count <= 6);
@ -839,16 +837,16 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
if (res) {
switch (jit_reg_kind(res)) {
case JIT_REG_KIND_I32:
res_hreg = eax_hreg;
res_reg = eax_hreg;
break;
case JIT_REG_KIND_I64:
res_hreg = rax_hreg;
res_reg = res;
break;
case JIT_REG_KIND_F32:
res_hreg = xmm0_hreg;
res_reg = xmm0_hreg;
break;
case JIT_REG_KIND_F64:
res_hreg = xmm0_f64_hreg;
res_reg = res;
break;
default:
bh_assert(0);
@ -856,7 +854,7 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
}
}
insn = GEN_INSN(CALLNATIVE, res_hreg, native_func_reg, param_count);
insn = GEN_INSN(CALLNATIVE, res_reg, native_func_reg, param_count);
if (!insn) {
return false;
}
@ -880,8 +878,8 @@ emit_callnative(JitCompContext *cc, JitReg native_func_reg, JitReg res,
}
}
if (res) {
GEN_INSN(MOV, res, res_hreg);
if (res && res != res_reg) {
GEN_INSN(MOV, res, res_reg);
}
return true;

View File

@ -177,38 +177,6 @@ get_global_type(const WASMModule *module, uint32 global_idx)
}
}
static uint32
get_global_data_offset(const WASMModule *module, uint32 global_idx)
{
uint32 module_inst_struct_size =
(uint32)offsetof(WASMModuleInstance, global_table_data.bytes);
uint32 mem_inst_size =
(uint32)sizeof(WASMMemoryInstance)
* (module->import_memory_count + module->memory_count);
uint32 global_base_offset;
#if WASM_ENABLE_JIT != 0
/* If the module dosen't have memory, reserve one mem_info space
with empty content to align with llvm jit compiler */
if (mem_inst_size == 0)
mem_inst_size = (uint32)sizeof(WASMMemoryInstance);
#endif
/* Size of module inst and memory instances */
global_base_offset = module_inst_struct_size + mem_inst_size;
if (global_idx < module->import_global_count) {
const WASMGlobalImport *import_global =
&((module->import_globals + global_idx)->u.global);
return global_base_offset + import_global->data_offset;
}
else {
const WASMGlobal *global =
module->globals + (global_idx - module->import_global_count);
return global_base_offset + global->data_offset;
}
}
bool
jit_compile_op_get_global(JitCompContext *cc, uint32 global_idx)
{
@ -219,7 +187,8 @@ jit_compile_op_get_global(JitCompContext *cc, uint32 global_idx)
bh_assert(global_idx < cc->cur_wasm_module->import_global_count
+ cc->cur_wasm_module->global_count);
data_offset = get_global_data_offset(cc->cur_wasm_module, global_idx);
data_offset =
jit_frontend_get_global_data_offset(cc->cur_wasm_module, global_idx);
global_type = get_global_type(cc->cur_wasm_module, global_idx);
switch (global_type) {
@ -280,7 +249,8 @@ jit_compile_op_set_global(JitCompContext *cc, uint32 global_idx,
bh_assert(global_idx < cc->cur_wasm_module->import_global_count
+ cc->cur_wasm_module->global_count);
data_offset = get_global_data_offset(cc->cur_wasm_module, global_idx);
data_offset =
jit_frontend_get_global_data_offset(cc->cur_wasm_module, global_idx);
global_type = get_global_type(cc->cur_wasm_module, global_idx);
switch (global_type) {

View File

@ -21,6 +21,103 @@
#include "../interpreter/wasm_runtime.h"
#include "../common/wasm_exec_env.h"
static uint32
get_global_base_offset(const WASMModule *module)
{
uint32 module_inst_struct_size =
(uint32)offsetof(WASMModuleInstance, global_table_data.bytes);
uint32 mem_inst_size =
(uint32)sizeof(WASMMemoryInstance)
* (module->import_memory_count + module->memory_count);
#if WASM_ENABLE_JIT != 0
/* If the module dosen't have memory, reserve one mem_info space
with empty content to align with llvm jit compiler */
if (mem_inst_size == 0)
mem_inst_size = (uint32)sizeof(WASMMemoryInstance);
#endif
/* Size of module inst and memory instances */
return module_inst_struct_size + mem_inst_size;
}
static uint32
get_first_table_inst_offset(const WASMModule *module)
{
return get_global_base_offset(module) + module->global_data_size;
}
uint32
jit_frontend_get_global_data_offset(const WASMModule *module, uint32 global_idx)
{
uint32 global_base_offset = get_global_base_offset(module);
if (global_idx < module->import_global_count) {
const WASMGlobalImport *import_global =
&((module->import_globals + global_idx)->u.global);
return global_base_offset + import_global->data_offset;
}
else {
const WASMGlobal *global =
module->globals + (global_idx - module->import_global_count);
return global_base_offset + global->data_offset;
}
}
uint32
jit_frontend_get_table_inst_offset(const WASMModule *module, uint32 tbl_idx)
{
uint32 offset, i = 0;
offset = get_first_table_inst_offset(module);
while (i < tbl_idx && i < module->import_table_count) {
WASMTableImport *import_table = &module->import_tables[i].u.table;
offset += (uint32)offsetof(WASMTableInstance, elems);
#if WASM_ENABLE_MULTI_MODULE != 0
offset += (uint32)sizeof(uint32) * import_table->max_size;
#else
offset += (uint32)sizeof(uint32)
* (import_table->possible_grow ? import_table->max_size
: import_table->init_size);
#endif
i++;
}
if (i == tbl_idx) {
return offset;
}
tbl_idx -= module->import_table_count;
i -= module->import_table_count;
while (i < tbl_idx && i < module->table_count) {
WASMTable *table = module->tables + i;
offset += (uint32)offsetof(WASMTableInstance, elems);
#if WASM_ENABLE_MULTI_MODULE != 0
offset += (uint32)sizeof(uint32) * table->max_size;
#else
offset += (uint32)sizeof(uint32)
* (table->possible_grow ? table->max_size : table->init_size);
#endif
i++;
}
return offset;
}
uint32
jit_frontend_get_module_inst_extra_offset(const WASMModule *module)
{
uint32 offset = jit_frontend_get_table_inst_offset(
module, module->import_table_count + module->table_count);
return align_uint(offset, 8);
}
JitReg
get_module_inst_reg(JitFrame *frame)
{
@ -294,72 +391,14 @@ get_mem_bound_check_16bytes_reg(JitFrame *frame, uint32 mem_idx)
return frame->memory_regs[mem_idx].mem_bound_check_16bytes;
}
static uint32
get_table_inst_offset(const WASMModule *module, uint32 tbl_idx)
{
uint32 module_inst_struct_size =
(uint32)offsetof(WASMModuleInstance, global_table_data.bytes);
uint32 mem_inst_size =
(uint32)sizeof(WASMMemoryInstance)
* (module->import_memory_count + module->memory_count);
uint32 offset, i = 0;
#if WASM_ENABLE_JIT != 0
/* If the module dosen't have memory, reserve one mem_info space
with empty content to align with llvm jit compiler */
if (mem_inst_size == 0)
mem_inst_size = (uint32)sizeof(WASMMemoryInstance);
#endif
/* Offset of the first table: size of module inst, memory instances
and global data */
offset = module_inst_struct_size + mem_inst_size + module->global_data_size;
while (i < tbl_idx && i < module->import_table_count) {
WASMTableImport *import_table = &module->import_tables[i].u.table;
offset += (uint32)offsetof(WASMTableInstance, elems);
#if WASM_ENABLE_MULTI_MODULE != 0
offset += (uint32)sizeof(uint32) * import_table->max_size;
#else
offset += (uint32)sizeof(uint32)
* (import_table->possible_grow ? import_table->max_size
: import_table->init_size);
#endif
i++;
}
if (i == tbl_idx) {
return offset;
}
tbl_idx -= module->import_table_count;
i -= module->import_table_count;
while (i < tbl_idx && i < module->table_count) {
WASMTable *table = module->tables + i;
offset += (uint32)offsetof(WASMTableInstance, elems);
#if WASM_ENABLE_MULTI_MODULE != 0
offset += (uint32)sizeof(uint32) * table->max_size;
#else
offset += (uint32)sizeof(uint32)
* (table->possible_grow ? table->max_size : table->init_size);
#endif
i++;
}
return offset;
}
JitReg
get_table_elems_reg(JitFrame *frame, uint32 tbl_idx)
{
JitCompContext *cc = frame->cc;
JitReg module_inst = get_module_inst_reg(frame);
uint32 offset = get_table_inst_offset(cc->cur_wasm_module, tbl_idx)
+ (uint32)offsetof(WASMTableInstance, elems);
uint32 offset =
jit_frontend_get_table_inst_offset(cc->cur_wasm_module, tbl_idx)
+ (uint32)offsetof(WASMTableInstance, elems);
if (!frame->table_regs[tbl_idx].table_elems) {
frame->table_regs[tbl_idx].table_elems =
@ -375,8 +414,9 @@ get_table_cur_size_reg(JitFrame *frame, uint32 tbl_idx)
{
JitCompContext *cc = frame->cc;
JitReg module_inst = get_module_inst_reg(frame);
uint32 offset = get_table_inst_offset(cc->cur_wasm_module, tbl_idx)
+ (uint32)offsetof(WASMTableInstance, cur_size);
uint32 offset =
jit_frontend_get_table_inst_offset(cc->cur_wasm_module, tbl_idx)
+ (uint32)offsetof(WASMTableInstance, cur_size);
if (!frame->table_regs[tbl_idx].table_cur_size) {
frame->table_regs[tbl_idx].table_cur_size =
@ -745,6 +785,13 @@ init_func_translation(JitCompContext *cc)
uint32 frame_size, outs_size, local_size, count;
uint32 i, local_off;
uint64 total_size;
#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0
JitReg module_inst, func_inst;
uint32 func_insts_offset;
#if WASM_ENABLE_PERF_PROFILING != 0
JitReg time_started;
#endif
#endif
if ((uint64)max_locals + (uint64)max_stacks >= UINT32_MAX
|| total_cell_num >= UINT32_MAX
@ -810,6 +857,21 @@ init_func_translation(JitCompContext *cc)
frame_boundary = jit_cc_new_reg_ptr(cc);
frame_sp = jit_cc_new_reg_ptr(cc);
#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0
module_inst = jit_cc_new_reg_ptr(cc);
func_inst = jit_cc_new_reg_ptr(cc);
#if WASM_ENABLE_PERF_PROFILING != 0
time_started = jit_cc_new_reg_I64(cc);
/* Call os_time_get_boot_microsecond() to get time_started firstly
as there is stack frame switching below, calling native in them
may cause register spilling work inproperly */
if (!jit_emit_callnative(cc, os_time_get_boot_microsecond, time_started,
NULL, 0)) {
return NULL;
}
#endif
#endif
/* top = exec_env->wasm_stack.s.top */
GEN_INSN(LDPTR, top, cc->exec_env_reg,
NEW_CONST(I32, offsetof(WASMExecEnv, wasm_stack.s.top)));
@ -840,11 +902,28 @@ init_func_translation(JitCompContext *cc)
/* frame->prev_frame = fp_reg */
GEN_INSN(STPTR, cc->fp_reg, top,
NEW_CONST(I32, offsetof(WASMInterpFrame, prev_frame)));
/* TODO: do we need to set frame->function? */
/*
#if WASM_ENABLE_DUMP_CALL_STACK != 0 || WASM_ENABLE_PERF_PROFILING != 0
/* module_inst = exec_env->module_inst */
GEN_INSN(LDPTR, module_inst, cc->exec_env_reg,
NEW_CONST(I32, offsetof(WASMExecEnv, module_inst)));
func_insts_offset =
jit_frontend_get_module_inst_extra_offset(cur_wasm_module)
+ (uint32)offsetof(WASMModuleInstanceExtra, functions);
/* func_inst = module_inst->e->functions */
GEN_INSN(LDPTR, func_inst, module_inst, NEW_CONST(I32, func_insts_offset));
/* func_inst = func_inst + cur_wasm_func_idx */
GEN_INSN(ADD, func_inst, func_inst,
NEW_CONST(PTR, (uint32)sizeof(WASMFunctionInstance)
* cur_wasm_func_idx));
/* frame->function = func_inst */
GEN_INSN(STPTR, func_inst, top,
NEW_CONST(I32, offsetof(WASMInterpFrame, function)));
*/
#if WASM_ENABLE_PERF_PROFILING != 0
/* frame->time_started = time_started */
GEN_INSN(STI64, time_started, top,
NEW_CONST(I32, offsetof(WASMInterpFrame, time_started)));
#endif
#endif
/* exec_env->cur_frame = top */
GEN_INSN(STPTR, top, cc->exec_env_reg,
NEW_CONST(I32, offsetof(WASMExecEnv, cur_frame)));

View File

@ -133,22 +133,6 @@ typedef enum FloatArithmetic {
JitBasicBlock *
jit_frontend_translate_func(JitCompContext *cc);
/**
* Generate a block leaving the compiled code, which must store the
* target bcip and other necessary information for switching to
* interpreter or other compiled code and then jump to the exit of the
* cc.
*
* @param cc the compilation context
* @param bcip the target bytecode instruction pointer
* @param sp_offset stack pointer offset at the beginning of the block
*
* @return the leaving block if succeeds, NULL otherwise
*/
JitBlock *
jit_frontend_gen_leaving_block(JitCompContext *cc, void *bcip,
unsigned sp_offset);
/**
* Lower the IR of the given compilation context.
*
@ -159,6 +143,16 @@ jit_frontend_gen_leaving_block(JitCompContext *cc, void *bcip,
bool
jit_frontend_lower(JitCompContext *cc);
uint32
jit_frontend_get_global_data_offset(const WASMModule *module,
uint32 global_idx);
uint32
jit_frontend_get_table_inst_offset(const WASMModule *module, uint32 tbl_idx);
uint32
jit_frontend_get_module_inst_extra_offset(const WASMModule *module);
JitReg
get_module_inst_reg(JitFrame *frame);

View File

@ -103,7 +103,7 @@ vm_link(wasm_vm_t *vm, wasm_extern_vec_t *imports)
if (!vm->function_list)
goto fail;
memset(vm->function_list, 0, sizeof(2 * sizeof(wasm_func_t *)));
memset(vm->function_list, 0, 2 * sizeof(wasm_func_t *));
/* bind wasm_set_byte(...) */
assert(wasm_extern_kind(vm->exports->data[1]) == WASM_EXTERN_FUNC);