XIP adaptation for xtensa platform (#1636)

Add macro WASM_ENABLE_WORD_ALING_READ to enable reading
1/2/4 and n bytes data from vram buffer, which requires 4-byte addr
alignment reading.

Eliminate XIP AOT relocations related to the below ones:
   i32_div_u, f32_min, f32_max, f32_ceil, f32_floor, f32_trunc, f32_rint
This commit is contained in:
dongsheng28849455 2022-10-31 17:25:24 +08:00 committed by GitHub
parent dba9e52f2f
commit e517dbc7b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 244 additions and 11 deletions

View File

@ -70,6 +70,10 @@
#define WASM_ENABLE_AOT 0
#endif
#ifndef WASM_ENABLE_WORD_ALIGN_READ
#define WASM_ENABLE_WORD_ALIGN_READ 0
#endif
#define AOT_MAGIC_NUMBER 0x746f6100
#define AOT_CURRENT_VERSION 3

View File

@ -66,6 +66,7 @@ static const aot_intrinsic g_intrinsic_mapping[] = {
{ "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST },
{ "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST },
{ "i64.div_s", "aot_intrinsic_i64_div_s", AOT_INTRINSIC_FLAG_I64_DIV_S},
{ "i32.div_u", "aot_intrinsic_i32_div_u", AOT_INTRINSIC_FLAG_I32_DIV_U},
{ "i64.div_u", "aot_intrinsic_i64_div_u", AOT_INTRINSIC_FLAG_I64_DIV_U},
{ "i64.rem_s", "aot_intrinsic_i64_rem_s", AOT_INTRINSIC_FLAG_I64_REM_S},
{ "i64.rem_u", "aot_intrinsic_i64_rem_u", AOT_INTRINSIC_FLAG_I64_REM_U},
@ -497,6 +498,12 @@ aot_intrinsic_i64_div_s(int64 l, int64 r)
return l / r;
}
uint32
aot_intrinsic_i32_div_u(uint32 l, uint32 r)
{
return l / r;
}
uint64
aot_intrinsic_i64_div_u(uint64 l, uint64 r)
{
@ -551,6 +558,12 @@ add_i64_common_intrinsics(AOTCompContext *comp_ctx)
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_REM_U);
}
static void
add_i32_common_intrinsics(AOTCompContext *comp_ctx)
{
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_DIV_U);
}
static void
add_f32_common_intrinsics(AOTCompContext *comp_ctx)
{
@ -561,6 +574,12 @@ add_f32_common_intrinsics(AOTCompContext *comp_ctx)
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FDIV);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_SQRT);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CMP);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_MIN);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_MAX);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CEIL);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_FLOOR);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_TRUNC);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_RINT);
}
static void
@ -667,7 +686,9 @@ aot_intrinsic_fill_capability_flags(AOTCompContext *comp_ctx)
* will cause rodata relocation
*/
add_f32_common_intrinsics(comp_ctx);
add_i32_common_intrinsics(comp_ctx);
add_f64_common_intrinsics(comp_ctx);
add_i64_common_intrinsics(comp_ctx);
add_common_float_integer_convertion(comp_ctx);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST);
add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST);

View File

@ -59,6 +59,7 @@ extern "C" {
#define AOT_INTRINSIC_FLAG_F32_CMP AOT_INTRINSIC_FLAG(0, 25)
#define AOT_INTRINSIC_FLAG_F32_CONST AOT_INTRINSIC_FLAG(0, 26)
#define AOT_INTRINSIC_FLAG_I32_CONST AOT_INTRINSIC_FLAG(0, 27)
#define AOT_INTRINSIC_FLAG_I32_DIV_U AOT_INTRINSIC_FLAG(0, 28)
#define AOT_INTRINSIC_FLAG_F64_FADD AOT_INTRINSIC_FLAG(1, 0)
#define AOT_INTRINSIC_FLAG_F64_FSUB AOT_INTRINSIC_FLAG(1, 1)
@ -254,6 +255,9 @@ aot_intrinsic_f64_cmp(AOTFloatCond cond, float64 lhs, float64 rhs);
int64
aot_intrinsic_i64_div_s(int64 l, int64 r);
uint32
aot_intrinsic_i32_div_u(uint32 l, uint32 r);
uint64
aot_intrinsic_i64_div_u(uint64 l, uint64 r);

View File

@ -123,6 +123,80 @@ GET_U64_FROM_ADDR(uint32 *addr)
return u.val;
}
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
static inline uint8
GET_U8_FROM_ADDR(const uint8 *p)
{
uint8 res = 0;
bh_assert(p);
const uint8 *p_aligned = align_ptr(p, 4);
p_aligned = (p_aligned > p) ? p_aligned - 4 : p_aligned;
uint32 buf32 = *(const uint32 *)p_aligned;
const uint8 *pbuf = (const uint8 *)&buf32;
res = *(uint8 *)(pbuf + (p - p_aligned));
return res;
}
static inline uint16
GET_U16_FROM_ADDR(const uint8 *p)
{
uint16 res = 0;
bh_assert(p);
const uint8 *p_aligned = align_ptr(p, 4);
p_aligned = (p_aligned > p) ? p_aligned - 4 : p_aligned;
uint32 buf32 = *(const uint32 *)p_aligned;
const uint8 *pbuf = (const uint8 *)&buf32;
res = *(uint16 *)(pbuf + (p - p_aligned));
return res;
}
#define TEMPLATE_READ(p, p_end, res, type) \
do { \
if (sizeof(type) != sizeof(uint64)) \
p = (uint8 *)align_ptr(p, sizeof(type)); \
else \
/* align 4 bytes if type is uint64 */ \
p = (uint8 *)align_ptr(p, sizeof(uint32)); \
CHECK_BUF(p, p_end, sizeof(type)); \
if (sizeof(type) == sizeof(uint8)) \
res = GET_U8_FROM_ADDR(p); \
else if (sizeof(type) == sizeof(uint16)) \
res = GET_U16_FROM_ADDR(p); \
else if (sizeof(type) == sizeof(uint32)) \
res = *(type *)p; \
else \
res = (type)GET_U64_FROM_ADDR((uint32 *)p); \
if (!is_little_endian()) \
exchange_##type((uint8 *)&res); \
p += sizeof(type); \
} while (0)
#define read_byte_array(p, p_end, addr, len) \
do { \
CHECK_BUF(p, p_end, len); \
bh_memcpy_wa(addr, len, p, len); \
p += len; \
} while (0)
#define read_string(p, p_end, str) \
do { \
if (!(str = load_string((uint8 **)&p, p_end, module, \
is_load_from_file_buf, true, error_buf, \
error_buf_size))) \
goto fail; \
} while (0)
#else /* else of (WASM_ENABLE_WORD_ALIGN_READ != 0) */
#define TEMPLATE_READ(p, p_end, res, type) \
do { \
if (sizeof(type) != sizeof(uint64)) \
@ -140,11 +214,6 @@ GET_U64_FROM_ADDR(uint32 *addr)
p += sizeof(type); \
} while (0)
#define read_uint8(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint8)
#define read_uint16(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint16)
#define read_uint32(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint32)
#define read_uint64(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint64)
#define read_byte_array(p, p_end, addr, len) \
do { \
CHECK_BUF(p, p_end, len); \
@ -160,6 +229,13 @@ GET_U64_FROM_ADDR(uint32 *addr)
goto fail; \
} while (0)
#endif /* end of (WASM_ENABLE_WORD_ALIGN_READ != 0) */
#define read_uint8(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint8)
#define read_uint16(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint16)
#define read_uint32(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint32)
#define read_uint64(p, p_end, res) TEMPLATE_READ(p, p_end, res, uint64)
/* Legal values for bin_type */
#define BIN_TYPE_ELF32L 0 /* 32-bit little endian */
#define BIN_TYPE_ELF32B 1 /* 32-bit big endian */
@ -211,6 +287,9 @@ loader_malloc(uint64 size, char *error_buf, uint32 error_buf_size)
static char *
const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
bool is_vram_word_align,
#endif
char *error_buf, uint32 error_buf_size)
{
HashMap *set = module->const_str_set;
@ -230,8 +309,15 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
if (!(c_str = loader_malloc((uint32)len + 1, error_buf, error_buf_size))) {
return NULL;
}
bh_memcpy_s(c_str, (uint32)(len + 1), str, (uint32)len);
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
if (is_vram_word_align) {
bh_memcpy_wa(c_str, (uint32)(len + 1), str, (uint32)len);
}
else
#endif
{
bh_memcpy_s(c_str, (uint32)(len + 1), str, (uint32)len);
}
c_str[len] = '\0';
if ((value = bh_hash_map_find(set, c_str))) {
@ -251,7 +337,11 @@ const_str_set_insert(const uint8 *str, int32 len, AOTModule *module,
static char *
load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
bool is_load_from_file_buf, char *error_buf, uint32 error_buf_size)
bool is_load_from_file_buf,
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
bool is_vram_word_align,
#endif
char *error_buf, uint32 error_buf_size)
{
uint8 *p = *p_buf;
const uint8 *p_end = buf_end;
@ -264,6 +354,15 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
if (str_len == 0) {
str = "";
}
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
else if (is_vram_word_align) {
if (!(str = const_str_set_insert((uint8 *)p, str_len, module,
is_vram_word_align, error_buf,
error_buf_size))) {
goto fail;
}
}
#endif
else if (p[str_len - 1] == '\0') {
/* The string is terminated with '\0', use it directly */
str = (char *)p;
@ -280,8 +379,11 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module,
/* Load from sections, the file buffer cannot be reffered to
after loading, we must create another string and insert it
into const string set */
if (!(str = const_str_set_insert((uint8 *)p, str_len, module, error_buf,
error_buf_size))) {
if (!(str = const_str_set_insert((uint8 *)p, str_len, module,
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
is_vram_word_align,
#endif
error_buf, error_buf_size))) {
goto fail;
}
}

View File

@ -110,6 +110,7 @@ typedef struct {
REG_SYM(aot_intrinsic_i64_div_u), \
REG_SYM(aot_intrinsic_i64_rem_s), \
REG_SYM(aot_intrinsic_i64_rem_u), \
REG_SYM(aot_intrinsic_i32_div_u), \
#define REG_COMMON_SYMBOLS \
REG_SYM(aot_set_exception_with_id), \

View File

@ -506,6 +506,10 @@ wasm_runtime_full_init(RuntimeInitArgs *init_args)
PackageType
get_package_type(const uint8 *buf, uint32 size)
{
#if (WASM_ENABLE_WORD_ALIGN_READ != 0)
uint32 buf32 = *(uint32 *)buf;
buf = (const uint8 *)&buf32;
#endif
if (buf && size >= 4) {
if (buf[0] == '\0' && buf[1] == 'a' && buf[2] == 's' && buf[3] == 'm')
return Wasm_Module_Bytecode;

View File

@ -565,7 +565,22 @@ compile_int_div(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
PUSH_INT(res);
return true;
case INT_DIV_U:
LLVM_BUILD_OP(UDiv, left, right, res, "div_u", false);
if (comp_ctx->disable_llvm_intrinsics && is_i32
&& aot_intrinsic_check_capability(comp_ctx, "i32.div_u")) {
res = aot_call_llvm_intrinsic(comp_ctx, func_ctx,
"i32.div_u", param_types[0],
param_types, 2, left, right);
}
else if (comp_ctx->disable_llvm_intrinsics && !is_i32
&& aot_intrinsic_check_capability(comp_ctx,
"i64.div_u")) {
res = aot_call_llvm_intrinsic(comp_ctx, func_ctx,
"i64.div_u", param_types[0],
param_types, 2, left, right);
}
else {
LLVM_BUILD_OP(UDiv, left, right, res, "div_u", false);
}
PUSH_INT(res);
return true;
case INT_REM_S:

View File

@ -5,6 +5,73 @@
#include "bh_common.h"
static char *
align_ptr(char *src, unsigned int b)
{
uintptr_t v = (uintptr_t)src;
uintptr_t m = b - 1;
return (char *)((v + m) & ~m);
}
/*
Memory copy, with word alignment
*/
int
b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n)
{
char *dest = (char *)s1;
char *src = (char *)s2;
char *pa = align_ptr(src, 4);
char *pb = align_ptr((src + n), 4);
unsigned int buff;
const char *p_byte_read;
unsigned int *p;
char *ps;
if (pa > src) {
pa -= 4;
}
for (p = (unsigned int *)pa; p < (unsigned int *)pb; p++) {
buff = *(p);
p_byte_read = ((char *)&buff);
/* read leading word */
if ((char *)p <= src) {
for (ps = src; ps < ((char *)p + 4); ps++) {
if (ps >= src + n) {
break;
}
p_byte_read = ((char *)&buff) + (ps - (char *)p);
*dest++ = *p_byte_read;
}
}
/* read trailing word */
else if ((char *)p >= pb - 4) {
for (ps = (char *)p; ps < src + n; ps++) {
*dest++ = *p_byte_read++;
}
}
/* read meaning word(s) */
else {
if ((char *)p + 4 >= src + n) {
for (ps = (char *)p; ps < src + n; ps++) {
*dest++ = *p_byte_read++;
}
}
else {
*(unsigned int *)dest = buff;
dest += 4;
}
}
}
return 0;
}
int
b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n)
{

View File

@ -19,6 +19,13 @@ extern "C" {
bh_assert(_ret == 0); \
} while (0)
#define bh_memcpy_wa(dest, dlen, src, slen) \
do { \
int _ret = slen == 0 ? 0 : b_memcpy_wa(dest, dlen, src, slen); \
(void)_ret; \
bh_assert(_ret == 0); \
} while (0)
#define bh_memmove_s(dest, dlen, src, slen) \
do { \
int _ret = slen == 0 ? 0 : b_memmove_s(dest, dlen, src, slen); \
@ -43,6 +50,8 @@ extern "C" {
int
b_memcpy_s(void *s1, unsigned int s1max, const void *s2, unsigned int n);
int
b_memcpy_wa(void *s1, unsigned int s1max, const void *s2, unsigned int n);
int
b_memmove_s(void *s1, unsigned int s1max, const void *s2, unsigned int n);
int
b_strcat_s(char *s1, unsigned int s1max, const char *s2);

View File

@ -135,6 +135,12 @@ else
CFLAGS += -DWASM_ENABLE_AOT=0
endif
ifeq ($(CONFIG_INTERPRETERS_WAMR_AOT_WORD_ALIGN_READ),y)
CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=1
else
CFLAGS += -DWASM_ENABLE_WORD_ALIGN_READ=0
endif
ifeq ($(CONFIG_INTERPRETERS_WAMR_FAST), y)
CFLAGS += -DWASM_ENABLE_FAST_INTERP=1
CFLAGS += -DWASM_ENABLE_INTERP=1