From 6922f3ac688a134763829190baecfa24843a81c5 Mon Sep 17 00:00:00 2001
From: Wenyong Huang <wenyong.huang@intel.com>
Date: Thu, 4 Aug 2022 14:42:28 +0800
Subject: [PATCH] Implement xtensa XIP (#1202)

Lookup table for i32.const and i64.const for xtensa XIP
Lookup const offset from table for load/store opcodes for xtensa XIP
Fill capability flags for xtensa XIP
Enable lower switch pass for xtensa XIP
---
 core/iwasm/aot/aot_intrinsic.c             | 19 +++++++++--
 core/iwasm/aot/aot_intrinsic.h             |  2 ++
 core/iwasm/aot/aot_loader.c                |  5 +--
 core/iwasm/compilation/aot_compiler.c      |  5 +--
 core/iwasm/compilation/aot_emit_const.c    | 38 +++++++++++++++++++---
 core/iwasm/compilation/aot_emit_memory.c   | 14 +++++++-
 core/iwasm/compilation/aot_emit_numberic.c | 14 +++++---
 core/iwasm/compilation/aot_llvm.c          | 10 ++++++
 doc/build_wamr.md                          |  4 +--
 9 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/core/iwasm/aot/aot_intrinsic.c b/core/iwasm/aot/aot_intrinsic.c
index 7a904cb0..a4af9de3 100644
--- a/core/iwasm/aot/aot_intrinsic.c
+++ b/core/iwasm/aot/aot_intrinsic.c
@@ -61,8 +61,10 @@ static const aot_intrinsic g_intrinsic_mapping[] = {
     { "f64_promote_f32", "aot_intrinsic_f32_to_f64", AOT_INTRINSIC_FLAG_F32_TO_F64 },
     { "f32_cmp", "aot_intrinsic_f32_cmp", AOT_INTRINSIC_FLAG_F32_CMP },
     { "f64_cmp", "aot_intrinsic_f64_cmp", AOT_INTRINSIC_FLAG_F64_CMP },
-    { "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST},
-    { "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST},
+    { "i32.const", NULL, AOT_INTRINSIC_FLAG_I32_CONST },
+    { "i64.const", NULL, AOT_INTRINSIC_FLAG_I64_CONST },
+    { "f32.const", NULL, AOT_INTRINSIC_FLAG_F32_CONST },
+    { "f64.const", NULL, AOT_INTRINSIC_FLAG_F64_CONST },
 };
 /* clang-format on */
 
@@ -619,6 +621,19 @@ aot_intrinsic_fill_capability_flags(AOTCompContext *comp_ctx)
         add_f64_common_intrinsics(comp_ctx);
         add_common_float_integer_convertion(comp_ctx);
     }
+    else if (!strncmp(comp_ctx->target_arch, "xtensa", 6)) {
+        /*
+         * Note: Use builtin intrinsics since hardware float operation
+         * will cause rodata relocation
+         */
+        add_f32_common_intrinsics(comp_ctx);
+        add_f64_common_intrinsics(comp_ctx);
+        add_common_float_integer_convertion(comp_ctx);
+        add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F32_CONST);
+        add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_F64_CONST);
+        add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I32_CONST);
+        add_intrinsic_capability(comp_ctx, AOT_INTRINSIC_FLAG_I64_CONST);
+    }
     else {
         /*
          * Use constant value table by default
diff --git a/core/iwasm/aot/aot_intrinsic.h b/core/iwasm/aot/aot_intrinsic.h
index a29c499e..b21e7759 100644
--- a/core/iwasm/aot/aot_intrinsic.h
+++ b/core/iwasm/aot/aot_intrinsic.h
@@ -58,6 +58,7 @@ extern "C" {
 #define AOT_INTRINSIC_FLAG_F32_TO_F64   AOT_INTRINSIC_FLAG(0, 24)
 #define AOT_INTRINSIC_FLAG_F32_CMP      AOT_INTRINSIC_FLAG(0, 25)
 #define AOT_INTRINSIC_FLAG_F32_CONST    AOT_INTRINSIC_FLAG(0, 26)
+#define AOT_INTRINSIC_FLAG_I32_CONST    AOT_INTRINSIC_FLAG(0, 27)
 
 #define AOT_INTRINSIC_FLAG_F64_FADD     AOT_INTRINSIC_FLAG(1, 0)
 #define AOT_INTRINSIC_FLAG_F64_FSUB     AOT_INTRINSIC_FLAG(1, 1)
@@ -86,6 +87,7 @@ extern "C" {
 #define AOT_INTRINSIC_FLAG_F64_TO_F32   AOT_INTRINSIC_FLAG(1, 24)
 #define AOT_INTRINSIC_FLAG_F64_CMP      AOT_INTRINSIC_FLAG(1, 25)
 #define AOT_INTRINSIC_FLAG_F64_CONST    AOT_INTRINSIC_FLAG(1, 26)
+#define AOT_INTRINSIC_FLAG_I64_CONST    AOT_INTRINSIC_FLAG(1, 27)
 /* clang-format on */
 
 float32
diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c
index d6560ccc..c185150c 100644
--- a/core/iwasm/aot/aot_loader.c
+++ b/core/iwasm/aot/aot_loader.c
@@ -482,7 +482,7 @@ load_native_symbol_section(const uint8 *buf, const uint8 *buf_end,
 
         for (i = cnt - 1; i >= 0; i--) {
             read_string(p, p_end, symbol);
-            if (!strncmp(symbol, "f32#", 4)) {
+            if (!strncmp(symbol, "f32#", 4) || !strncmp(symbol, "i32#", 4)) {
                 uint32 u32;
                 /* Resolve the raw int bits of f32 const */
                 if (!str2uint32(symbol + 4, &u32)) {
@@ -492,7 +492,8 @@ load_native_symbol_section(const uint8 *buf, const uint8 *buf_end,
                 }
                 *(uint32 *)(&module->native_symbol_list[i]) = u32;
             }
-            else if (!strncmp(symbol, "f64#", 4)) {
+            else if (!strncmp(symbol, "f64#", 4)
+                     || !strncmp(symbol, "i64#", 4)) {
                 uint64 u64;
                 /* Resolve the raw int bits of f64 const */
                 if (!str2uint64(symbol + 4, &u64)) {
diff --git a/core/iwasm/compilation/aot_compiler.c b/core/iwasm/compilation/aot_compiler.c
index 3cece0a5..064f5831 100644
--- a/core/iwasm/compilation/aot_compiler.c
+++ b/core/iwasm/compilation/aot_compiler.c
@@ -2742,8 +2742,9 @@ aot_require_lower_switch_pass(AOTCompContext *comp_ctx)
 {
     bool ret = false;
 
-    /* IR switch/case will cause .rodata relocation on riscv */
-    if (!strncmp(comp_ctx->target_arch, "riscv", 5)) {
+    /* IR switch/case will cause .rodata relocation on riscv/xtensa */
+    if (!strncmp(comp_ctx->target_arch, "riscv", 5)
+        || !strncmp(comp_ctx->target_arch, "xtensa", 6)) {
         ret = true;
     }
 
diff --git a/core/iwasm/compilation/aot_emit_const.c b/core/iwasm/compilation/aot_emit_const.c
index 949b275a..2508a36e 100644
--- a/core/iwasm/compilation/aot_emit_const.c
+++ b/core/iwasm/compilation/aot_emit_const.c
@@ -10,8 +10,23 @@ bool
 aot_compile_op_i32_const(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                          int32 i32_const)
 {
-    LLVMValueRef value = I32_CONST((uint32)i32_const);
-    CHECK_LLVM_CONST(value);
+    LLVMValueRef value;
+
+    if (comp_ctx->is_indirect_mode
+        && aot_intrinsic_check_capability(comp_ctx, "i32.const")) {
+        WASMValue wasm_value;
+        wasm_value.i32 = i32_const;
+        value = aot_load_const_from_table(comp_ctx, func_ctx->native_symbol,
+                                          &wasm_value, VALUE_TYPE_I32);
+        if (!value) {
+            return false;
+        }
+    }
+    else {
+        value = I32_CONST((uint32)i32_const);
+        CHECK_LLVM_CONST(value);
+    }
+
     PUSH_I32(value);
     return true;
 fail:
@@ -22,8 +37,23 @@ bool
 aot_compile_op_i64_const(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
                          int64 i64_const)
 {
-    LLVMValueRef value = I64_CONST((uint64)i64_const);
-    CHECK_LLVM_CONST(value);
+    LLVMValueRef value;
+
+    if (comp_ctx->is_indirect_mode
+        && aot_intrinsic_check_capability(comp_ctx, "i64.const")) {
+        WASMValue wasm_value;
+        wasm_value.i64 = i64_const;
+        value = aot_load_const_from_table(comp_ctx, func_ctx->native_symbol,
+                                          &wasm_value, VALUE_TYPE_I64);
+        if (!value) {
+            return false;
+        }
+    }
+    else {
+        value = I64_CONST((uint64)i64_const);
+        CHECK_LLVM_CONST(value);
+    }
+
     PUSH_I64(value);
     return true;
 fail:
diff --git a/core/iwasm/compilation/aot_emit_memory.c b/core/iwasm/compilation/aot_emit_memory.c
index aacc1640..05e13b30 100644
--- a/core/iwasm/compilation/aot_emit_memory.c
+++ b/core/iwasm/compilation/aot_emit_memory.c
@@ -97,7 +97,19 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
 
     is_target_64bit = (comp_ctx->pointer_size == sizeof(uint64)) ? true : false;
 
-    CHECK_LLVM_CONST(offset_const);
+    if (comp_ctx->is_indirect_mode
+        && aot_intrinsic_check_capability(comp_ctx, "i32.const")) {
+        WASMValue wasm_value;
+        wasm_value.i32 = offset;
+        offset_const = aot_load_const_from_table(
+            comp_ctx, func_ctx->native_symbol, &wasm_value, VALUE_TYPE_I32);
+        if (!offset_const) {
+            return NULL;
+        }
+    }
+    else {
+        CHECK_LLVM_CONST(offset_const);
+    }
 
     /* Get memory base address and memory data size */
     if (func_ctx->mem_space_unchanged
diff --git a/core/iwasm/compilation/aot_emit_numberic.c b/core/iwasm/compilation/aot_emit_numberic.c
index 44e65a0c..3ddd8cea 100644
--- a/core/iwasm/compilation/aot_emit_numberic.c
+++ b/core/iwasm/compilation/aot_emit_numberic.c
@@ -800,9 +800,10 @@ is_targeting_soft_float(AOTCompContext *comp_ctx, bool is_f32)
      * so user must specify '--cpu-features=+soft-float' to wamrc if the target
      * doesn't have or enable FPU on arm, x86 or mips. */
     if (is_target_arm(comp_ctx) || is_target_x86(comp_ctx)
-        || is_target_mips(comp_ctx))
+        || is_target_mips(comp_ctx)) {
         ret = strstr(feature_string, "+soft-float") ? true : false;
-    else if (is_target_xtensa(comp_ctx))
+    }
+    else if (is_target_xtensa(comp_ctx)) {
         /* Note:
          * 1. The Floating-Point Coprocessor Option of xtensa only support
          * single-precision floating-point operations, so must use soft-float
@@ -811,7 +812,11 @@ is_targeting_soft_float(AOTCompContext *comp_ctx, bool is_f32)
          * so user must specify '--cpu-features=-fp' to wamrc if the target
          * doesn't have or enable Floating-Point Coprocessor Option on xtensa.
          */
-        ret = (!is_f32 || strstr(feature_string, "-fp")) ? true : false;
+        if (comp_ctx->disable_llvm_intrinsics)
+            ret = false;
+        else
+            ret = (!is_f32 || strstr(feature_string, "-fp")) ? true : false;
+    }
     else if (is_target_riscv(comp_ctx)) {
         /*
          * Note: Use builtin intrinsics since hardware float operation
@@ -823,8 +828,9 @@ is_targeting_soft_float(AOTCompContext *comp_ctx, bool is_f32)
         else
             ret = !strstr(feature_string, "+d") ? true : false;
     }
-    else
+    else {
         ret = true;
+    }
 
     LLVMDisposeMessage(feature_string);
     return ret;
diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c
index 12b34d58..97faeae3 100644
--- a/core/iwasm/compilation/aot_llvm.c
+++ b/core/iwasm/compilation/aot_llvm.c
@@ -2731,6 +2731,16 @@ aot_load_const_from_table(AOTCompContext *comp_ctx, LLVMValueRef base,
     int32 index;
 
     switch (value_type) {
+        case VALUE_TYPE_I32:
+            /* Store the raw int bits of i32 const as a hex string */
+            snprintf(buf, sizeof(buf), "i32#%08" PRIX32, value->i32);
+            const_ptr_type = INT32_PTR_TYPE;
+            break;
+        case VALUE_TYPE_I64:
+            /* Store the raw int bits of i64 const as a hex string */
+            snprintf(buf, sizeof(buf), "i64#%016" PRIX64, value->i64);
+            const_ptr_type = INT64_PTR_TYPE;
+            break;
         case VALUE_TYPE_F32:
             /* Store the raw int bits of f32 const as a hex string */
             snprintf(buf, sizeof(buf), "f32#%08" PRIX32, value->i32);
diff --git a/doc/build_wamr.md b/doc/build_wamr.md
index e35108af..4a7f26e3 100644
--- a/doc/build_wamr.md
+++ b/doc/build_wamr.md
@@ -580,8 +580,8 @@ In order to use this, you need at least version 4.3.1 of ESP-IDF.
 If you don't have it installed, follow the instructions [here](https://docs.espressif.com/projects/esp-idf/en/latest/esp32/get-started/#get-started-get-prerequisites).
 ESP-IDF also installs the toolchains needed for compiling WAMR and ESP-IDF.
 A small demonstration of how to use WAMR and ESP-IDF can be found under [product_mini](/product-mini/platforms/esp-idf).
-The demo builds WAMR for ESP-IDF and runs a small wasm program.
-In order to run it for your specific Espressif chip, edit the ['build_and_run.sh'](/product-mini/platforms/esp-idf/build_and_run.sh) file and put the correct toolchain file (see #Cross-compilation) and `IDF_TARGET`.
+The demo builds WAMR for ESP-IDF and runs a small wasm program. 
+In order to run it for your specific Espressif chip, edit the [build_and_run.sh](/product-mini/platforms/esp-idf/build_and_run.sh) file and put the correct toolchain file (see #Cross-compilation) and `IDF_TARGET`.
 Before compiling it is also necessary to call ESP-IDF's `export.sh` script to bring all compile time relevant information in scope.
 
 Docker