diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index e59937cf..fac0c485 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -285,7 +285,7 @@ check_machine_info(AOTTargetInfo *target_info, error_buf, error_buf_size)) return false; - if (strcmp(target_expected, target_got)) { + if (strncmp(target_expected, target_got, strlen(target_expected))) { set_error_buf_v(error_buf, error_buf_size, "invalid target type, expected %s but got %s", target_expected, target_got); diff --git a/core/iwasm/common/arch/invokeNative_aarch64_simd.s b/core/iwasm/common/arch/invokeNative_aarch64_simd.s new file mode 100644 index 00000000..a6ccc150 --- /dev/null +++ b/core/iwasm/common/arch/invokeNative_aarch64_simd.s @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2020 Intel Corporation Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + .text + .align 2 +#ifndef BH_PLATFORM_DARWIN + .globl invokeNative + .type invokeNative, function +invokeNative: +#else + .globl _invokeNative +_invokeNative: +#endif /* end of BH_PLATFORM_DARWIN */ + +/* + * Arguments passed in: + * + * x0 function ptr + * x1 argv + * x2 nstacks + */ + + sub sp, sp, #0x30 + stp x19, x20, [sp, #0x20] /* save the registers */ + stp x21, x22, [sp, #0x10] + stp x23, x24, [sp, #0x0] + + mov x19, x0 /* x19 = function ptr */ + mov x20, x1 /* x20 = argv */ + mov x21, x2 /* x21 = nstacks */ + mov x22, sp /* save the sp before call function */ + + /* Fill in float-point registers */ + ld1 {v0.2D, v1.2D, v2.2D, v3.2D}, [x20], #64 /* v0 = argv[0], v1 = argv[1], v2 = argv[2], v3 = argv[3]*/ + ld1 {v4.2D, v5.2D, v6.2D, v7.2D}, [x20], #64 /* v4 = argv[4], v5 = argv[5], v6 = argv[6], v7 = argv[7]*/ + + /* Fill inteter registers */ + ldp x0, x1, [x20], #16 /* x0 = argv[8] = exec_env, x1 = argv[9] */ + ldp x2, x3, [x20], #16 /* x2 = argv[10], x3 = argv[11] */ + ldp x4, x5, [x20], #16 /* x4 = argv[12], x5 = argv[13] */ + ldp x6, x7, [x20], #16 /* x6 = argv[14], x7 = argv[15] */ + + /* Now x20 points to stack args */ + + /* Directly call the fucntion if no args in stack */ + cmp x21, #0 + beq call_func + + /* Fill all stack args: reserve stack space and fill one by one */ + mov x23, sp + bic sp, x23, #15 /* Ensure stack is 16 bytes aligned */ + lsl x23, x21, #3 /* x23 = nstacks * 8 */ + add x23, x23, #15 /* x23 = (x23 + 15) & ~15 */ + bic x23, x23, #15 + sub sp, sp, x23 /* reserved stack space for stack arguments */ + mov x23, sp + +loop_stack_args: /* copy stack arguments to stack */ + cmp x21, #0 + beq call_func + ldr x24, [x20], #8 + str x24, [x23], #8 + sub x21, x21, #1 + b loop_stack_args + +call_func: + mov x20, x30 /* save x30(lr) */ + blr x19 + mov sp, x22 /* restore sp which is saved before calling fuction*/ + +return: + mov x30, x20 /* restore x30(lr) */ + ldp x19, x20, [sp, #0x20] /* restore the registers in stack */ + ldp x21, x22, [sp, #0x10] + ldp x23, x24, [sp, #0x0] + add sp, sp, #0x30 /* restore sp */ + ret + diff --git a/core/iwasm/common/iwasm_common.cmake b/core/iwasm/common/iwasm_common.cmake index e1b8d822..608f840b 100644 --- a/core/iwasm/common/iwasm_common.cmake +++ b/core/iwasm/common/iwasm_common.cmake @@ -43,7 +43,11 @@ elseif (WAMR_BUILD_TARGET MATCHES "THUMB.*") set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_thumb.s) endif () elseif (WAMR_BUILD_TARGET MATCHES "AARCH64.*") - set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64.s) + if (NOT WAMR_BUILD_SIMD EQUAL 1) + set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64.s) + else() + set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64_simd.s) + endif() elseif (WAMR_BUILD_TARGET STREQUAL "MIPS") set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_mips.s) elseif (WAMR_BUILD_TARGET STREQUAL "XTENSA") diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index aad71e1a..e2721301 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -3414,10 +3414,14 @@ typedef union __declspec(intrin_type) __declspec(align(8)) v128 { unsigned __int32 m128i_u32[4]; unsigned __int64 m128i_u64[2]; } v128; -#else +#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64) typedef long long v128 __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); -#endif /* end of defined(_WIN32) || defined(_WIN32_) */ +#elif defined(BUILD_TARGET_AARCH64) +#include +typedef uint32x4_t __m128i; +#define v128 __m128i +#endif #endif /* end of WASM_ENABLE_SIMD != 0 */ diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 418ded3d..0044a3b4 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -1496,7 +1496,8 @@ aot_create_comp_context(AOTCompData *comp_data, } if (option->enable_simd - && strcmp(comp_ctx->target_arch, "x86_64") != 0) { + && strcmp(comp_ctx->target_arch, "x86_64") != 0 + && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) { /* Disable simd if it isn't supported by target arch */ option->enable_simd = false; } diff --git a/core/iwasm/compilation/simd/simd_access_lanes.c b/core/iwasm/compilation/simd/simd_access_lanes.c index eef22ea8..5abefbd4 100644 --- a/core/iwasm/compilation/simd/simd_access_lanes.c +++ b/core/iwasm/compilation/simd/simd_access_lanes.c @@ -8,6 +8,13 @@ #include "../aot_emit_exception.h" #include "../../aot/aot_runtime.h" +static bool +is_target_x86(AOTCompContext *comp_ctx) +{ + return !strncmp(comp_ctx->target_arch, "x86_64", 6) || + !strncmp(comp_ctx->target_arch, "i386", 4); +} + static LLVMValueRef build_intx16_vector(const AOTCompContext *comp_ctx, const LLVMTypeRef element_type, @@ -86,7 +93,7 @@ fail: /* TODO: instructions for other CPUs */ /* shufflevector is not an option, since it requires *mask as a const */ bool -aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) { LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result; LLVMTypeRef param_types[2]; @@ -151,6 +158,109 @@ fail: return false; } +bool +aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx) +{ + LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id, + result, idx, id, replace_with_zero, elem, elem_or_zero, undef; + uint8 i; + + if (is_target_x86(comp_ctx)) { + return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx); + } + + int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 }, + const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + + if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE, + "mask"))) { + goto fail; + } + + if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i8x16_TYPE, "vec"))) { + goto fail; + } + + if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) { + HANDLE_FAILURE("LLVMGetUndef"); + goto fail; + } + + /* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */ + if (!(max_lane_id = + build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) { + goto fail; + } + + if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask, + max_lane_id, "out_of_range"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + /* if the id is out of range (>=16), set the id as 0 */ + if (!(default_lane_value = + build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) { + goto fail; + } + + if (!(idx = LLVMBuildSelect(comp_ctx->builder, condition, + default_lane_value, mask, "mask"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + for (i = 0; i < 16; i++) { + if (!(id = LLVMBuildExtractElement(comp_ctx->builder, idx, I8_CONST(i), + "id"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + goto fail; + } + + if (!(replace_with_zero = + LLVMBuildExtractElement(comp_ctx->builder, condition, + I8_CONST(i), "replace_with_zero"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + goto fail; + } + + if (!(elem = LLVMBuildExtractElement(comp_ctx->builder, vector, id, + "vector[mask[i]]"))) { + HANDLE_FAILURE("LLVMBuildExtractElement"); + goto fail; + } + + if (!(elem_or_zero = + LLVMBuildSelect(comp_ctx->builder, replace_with_zero, + I8_CONST(0), elem, "elem_or_zero"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(undef = + LLVMBuildInsertElement(comp_ctx->builder, undef, elem_or_zero, + I8_CONST(i), "new_vector"))) { + HANDLE_FAILURE("LLVMBuildInsertElement"); + goto fail; + } + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, undef, V128_i64x2_TYPE, + "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + + return true; +fail: + return false; +} + static bool aot_compile_simd_extract(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, diff --git a/core/iwasm/compilation/simd/simd_conversions.c b/core/iwasm/compilation/simd/simd_conversions.c index f2d32c09..1f725f42 100644 --- a/core/iwasm/compilation/simd/simd_conversions.c +++ b/core/iwasm/compilation/simd/simd_conversions.c @@ -9,6 +9,13 @@ #include "../aot_emit_numberic.h" #include "../../aot/aot_runtime.h" +static bool +is_target_x86(AOTCompContext *comp_ctx) +{ + return !strncmp(comp_ctx->target_arch, "x86_64", 6) || + !strncmp(comp_ctx->target_arch, "i386", 4); +} + static bool simd_integer_narrow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, @@ -49,8 +56,85 @@ fail: return false; } +static LLVMValueRef +build_intx4_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int *element_value) +{ + LLVMValueRef vector, elements[4]; + unsigned i; + + for (i = 0; i < 4; i++) { + if (!(elements[i] = + LLVMConstInt(element_type, element_value[i], true))) { + HANDLE_FAILURE("LLVMConstInst"); + goto fail; + } + } + + if (!(vector = LLVMConstVector(elements, 4))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + return vector; +fail: + return NULL; +} + +static LLVMValueRef +build_intx8_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int *element_value) +{ + LLVMValueRef vector, elements[8]; + unsigned i; + + for (i = 0; i < 8; i++) { + if (!(elements[i] = + LLVMConstInt(element_type, element_value[i], true))) { + HANDLE_FAILURE("LLVMConstInst"); + goto fail; + } + } + + if (!(vector = LLVMConstVector(elements, 8))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + return vector; +fail: + return NULL; +} + +static LLVMValueRef +build_intx16_vector(const AOTCompContext *comp_ctx, + const LLVMTypeRef element_type, + const int *element_value) +{ + LLVMValueRef vector, elements[16]; + unsigned i; + + for (i = 0; i < 16; i++) { + if (!(elements[i] = + LLVMConstInt(element_type, element_value[i], true))) { + HANDLE_FAILURE("LLVMConstInst"); + goto fail; + } + } + + if (!(vector = LLVMConstVector(elements, 16))) { + HANDLE_FAILURE("LLVMConstVector"); + goto fail; + } + + return vector; +fail: + return NULL; +} + bool -aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, +aot_compile_simd_i8x16_narrow_i16x8_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed) { @@ -60,7 +144,7 @@ aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, } bool -aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, +aot_compile_simd_i16x8_narrow_i32x4_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, bool is_signed) { @@ -69,6 +153,273 @@ aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, is_signed ? "llvm.x86.sse2.packssdw.128" : "llvm.x86.sse41.packusdw"); } +bool +aot_compile_simd_i8x16_narrow_i16x8(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle, + vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced, + shuffle_vector; + LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min; + + if (is_target_x86(comp_ctx)) { + return aot_compile_simd_i8x16_narrow_i16x8_x86(comp_ctx, func_ctx, + is_signed); + } + + int min_s_array[8] = { 0xff80, 0xff80, 0xff80, 0xff80, + 0xff80, 0xff80, 0xff80, 0xff80 }; + int max_s_array[8] = { 0x007f, 0x007f, 0x007f, 0x007f, + 0x007f, 0x007f, 0x007f, 0x007f }; + + int min_u_array[8] = { 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 }; + int max_u_array[8] = { 0x00ff, 0x00ff, 0x00ff, 0x00ff, + 0x00ff, 0x00ff, 0x00ff, 0x00ff }; + + int shuffle_array[16] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 }; + + if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i16x8_TYPE, "vec2"))) { + goto fail; + } + + if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i16x8_TYPE, "vec1"))) { + goto fail; + } + + if (!(vector_min = build_intx8_vector( + comp_ctx, INT16_TYPE, is_signed ? min_s_array : min_u_array))) { + goto fail; + } + if (!(vector_max = build_intx8_vector( + comp_ctx, INT16_TYPE, is_signed ? max_s_array : max_u_array))) { + goto fail; + } + if (!(shuffle = build_intx16_vector(comp_ctx, I32_TYPE, shuffle_array))) { + goto fail; + } + + if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1, + vector_max, "v1_great_than_max"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2, + vector_max, "v2_great_than_max"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1, + vector_min, "v1_less_than_min"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2, + vector_min, "v2_less_than_min"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(vector1_clamped = + LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1, + "vector1_clamped_max"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector1_clamped = + LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min, + vector1_clamped, "vector1_clamped_min"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector2_clamped = + LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2, + "vector2_clamped_max"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector2_clamped = + LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min, + vector2_clamped, "vector2_clamped_min"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector1_trunced = + LLVMBuildTrunc(comp_ctx->builder, vector1_clamped, + LLVMVectorType(INT8_TYPE, 8), "vector1_trunced"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + + if (!(vector2_trunced = + LLVMBuildTrunc(comp_ctx->builder, vector2_clamped, + LLVMVectorType(INT8_TYPE, 8), "vector2_trunced"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + + if (!(shuffle_vector = LLVMBuildShuffleVector( + comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle, + "shuffle_vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector, + V128_i64x2_TYPE, "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; + +fail: + return false; +} + +bool +aot_compile_simd_i16x8_narrow_i32x4(AOTCompContext *comp_ctx, + AOTFuncContext *func_ctx, + bool is_signed) +{ + LLVMValueRef vector1, vector2, result, vector_min, vector_max, shuffle, + vector1_clamped, vector2_clamped, vector1_trunced, vector2_trunced, + shuffle_vector; + LLVMValueRef v1_gt_max, v1_lt_min, v2_gt_max, v2_lt_min; + + if (is_target_x86(comp_ctx)) { + return aot_compile_simd_i16x8_narrow_i32x4_x86(comp_ctx, func_ctx, + is_signed); + } + + int min_s_array[4] = { 0xffff8000, 0xffff8000, 0xffff8000, 0xffff8000 }; + int32 max_s_array[4] = { 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff }; + + int min_u_array[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; + int max_u_array[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; + + int shuffle_array[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + if (!(vector2 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i32x4_TYPE, "vec2"))) { + goto fail; + } + + if (!(vector1 = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, + V128_i32x4_TYPE, "vec1"))) { + goto fail; + } + + if (!(vector_min = build_intx4_vector( + comp_ctx, I32_TYPE, is_signed ? min_s_array : min_u_array))) { + goto fail; + } + if (!(vector_max = build_intx4_vector( + comp_ctx, I32_TYPE, is_signed ? max_s_array : max_u_array))) { + goto fail; + } + if (!(shuffle = build_intx8_vector(comp_ctx, I32_TYPE, shuffle_array))) { + goto fail; + } + + if (!(v1_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector1, + vector_max, "v1_great_than_max"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(v2_gt_max = LLVMBuildICmp(comp_ctx->builder, LLVMIntSGT, vector2, + vector_max, "v2_great_than_max"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(v1_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector1, + vector_min, "v1_less_than_min"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(v2_lt_min = LLVMBuildICmp(comp_ctx->builder, LLVMIntSLT, vector2, + vector_min, "v2_less_than_min"))) { + HANDLE_FAILURE("LLVMBuldICmp"); + goto fail; + } + + if (!(vector1_clamped = + LLVMBuildSelect(comp_ctx->builder, v1_gt_max, vector_max, vector1, + "vector1_clamped_max"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector1_clamped = + LLVMBuildSelect(comp_ctx->builder, v1_lt_min, vector_min, + vector1_clamped, "vector1_clamped_min"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector2_clamped = + LLVMBuildSelect(comp_ctx->builder, v2_gt_max, vector_max, vector2, + "vector2_clamped_max"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector2_clamped = + LLVMBuildSelect(comp_ctx->builder, v2_lt_min, vector_min, + vector2_clamped, "vector2_clamped_min"))) { + HANDLE_FAILURE("LLVMBuildSelect"); + goto fail; + } + + if (!(vector1_trunced = LLVMBuildTrunc(comp_ctx->builder, vector1_clamped, + LLVMVectorType(INT16_TYPE, 4), + "vector1_trunced"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + + if (!(vector2_trunced = LLVMBuildTrunc(comp_ctx->builder, vector2_clamped, + LLVMVectorType(INT16_TYPE, 4), + "vector2_trunced"))) { + HANDLE_FAILURE("LLVMBuildTrunc"); + goto fail; + } + + if (!(shuffle_vector = LLVMBuildShuffleVector( + comp_ctx->builder, vector1_trunced, vector2_trunced, shuffle, + "shuffle_vector"))) { + HANDLE_FAILURE("LLVMBuildShuffleVector"); + goto fail; + } + + if (!(result = LLVMBuildBitCast(comp_ctx->builder, shuffle_vector, + V128_i64x2_TYPE, "ret"))) { + HANDLE_FAILURE("LLVMBuildBitCast"); + goto fail; + } + + PUSH_V128(result); + return true; + +fail: + return false; +} + bool aot_compile_simd_i16x8_widen_i8x16(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,