From 1614ce12fa053d5915fb6d6b70cc234aa9a4b483 Mon Sep 17 00:00:00 2001 From: tonibofarull Date: Thu, 2 Feb 2023 01:09:46 +0100 Subject: [PATCH] wasi-nn: Enable GPU support (#1922) - Split logic in several dockers - runtime: wasi-nn-cpu and wasi-nn- Nvidia-gpu. - compilation: wasi-nn-compile. Prepare the testing wasm and generates the TFLites. - Implement GPU support for TFLite with Opencl. --- core/iwasm/libraries/wasi-nn/README.md | 55 +++++++++++++-- .../wasi-nn/src/wasi_nn_tensorflowlite.cpp | 28 +++++++- .../libraries/wasi-nn/test/.dockerignore | 1 - .../libraries/wasi-nn/test/CMakeLists.txt | 4 +- .../libraries/wasi-nn/test/Dockerfile.base | 22 ++++++ .../test/{Dockerfile => Dockerfile.compile} | 25 ++----- .../libraries/wasi-nn/test/Dockerfile.cpu | 8 +++ .../wasi-nn/test/Dockerfile.nvidia-gpu | 20 ++++++ core/iwasm/libraries/wasi-nn/test/build.sh | 2 +- .../libraries/wasi-nn/test/test_tensorflow.c | 69 ++++++++++++------- 10 files changed, 178 insertions(+), 56 deletions(-) delete mode 100644 core/iwasm/libraries/wasi-nn/test/.dockerignore create mode 100644 core/iwasm/libraries/wasi-nn/test/Dockerfile.base rename core/iwasm/libraries/wasi-nn/test/{Dockerfile => Dockerfile.compile} (54%) create mode 100644 core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu create mode 100644 core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu mode change 100755 => 100644 core/iwasm/libraries/wasi-nn/test/test_tensorflow.c diff --git a/core/iwasm/libraries/wasi-nn/README.md b/core/iwasm/libraries/wasi-nn/README.md index ed3fbd54..efd8fbfc 100644 --- a/core/iwasm/libraries/wasi-nn/README.md +++ b/core/iwasm/libraries/wasi-nn/README.md @@ -17,24 +17,69 @@ By only including this file in your WASM application you will bind WASI-NN into To run the tests we assume that the current directory is the root of the repository. -1. Build the docker image, +### Build the runtime + +Build the runtime base image, ``` -docker build -t wasi-nn -f core/iwasm/libraries/wasi-nn/test/Dockerfile . +docker build -t wasi-nn-base -f core/iwasm/libraries/wasi-nn/test/Dockerfile.base . ``` -2. Run the container +Build the runtime image for your execution target type. + +`EXECUTION_TYPE` can be: +* `cpu` +* `nvidia-gpu` ``` -docker run wasi-nn +EXECUTION_TYPE=cpu +docker build -t wasi-nn-${EXECUTION_TYPE} -f core/iwasm/libraries/wasi-nn/test/Dockerfile.${EXECUTION_TYPE} . ``` + +### Build wasm app + +``` +docker build -t wasi-nn-compile -f core/iwasm/libraries/wasi-nn/test/Dockerfile.compile . +``` + +``` +docker run -v $PWD/core/iwasm/libraries/wasi-nn:/wasi-nn wasi-nn-compile +``` + + +### Run wasm app + If all the tests have run properly you will the the following message in the terminal, ``` Tests: passed! ``` +* CPU + +``` +docker run \ + -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-cpu \ + --dir=/assets \ + --env="TARGET=cpu" \ + /assets/test_tensorflow.wasm +``` + +* (NVIDIA) GPU + +``` +docker run \ + --runtime=nvidia \ + -v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-nvidia-gpu \ + --dir=/assets \ + --env="TARGET=gpu" \ + /assets/test_tensorflow.wasm +``` + +Requirements: +* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker). + ## What is missing Supported: @@ -43,5 +88,5 @@ Supported: * Only 1 model at a time. * `graph` and `graph-execution-context` are ignored. * Graph encoding: `tensorflowlite`. -* Execution target: `cpu`. +* Execution target: `cpu` and `gpu`. * Tensor type: `fp32`. diff --git a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp index b795a7fe..d40e2399 100644 --- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp +++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp @@ -16,6 +16,7 @@ #include #include #include +#include /* Global variables */ @@ -45,8 +46,8 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding, return invalid_argument; } - if (target != cpu) { - NN_ERR_PRINTF("Only CPU target is supported."); + if (target != cpu && target != gpu) { + NN_ERR_PRINTF("Only CPU and GPU target is supported."); return invalid_argument; } @@ -79,6 +80,29 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding, return missing_memory; } + bool use_default = false; + switch (target) { + case gpu: + { + // https://www.tensorflow.org/lite/performance/gpu + auto options = TfLiteGpuDelegateOptionsV2Default(); + options.inference_preference = + TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED; + options.inference_priority1 = + TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; + auto *delegate = TfLiteGpuDelegateV2Create(&options); + if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) { + NN_ERR_PRINTF("Error when enabling GPU delegate."); + use_default = true; + } + break; + } + default: + use_default = true; + } + if (use_default) + NN_WARN_PRINTF("Default encoding is CPU."); + return success; } diff --git a/core/iwasm/libraries/wasi-nn/test/.dockerignore b/core/iwasm/libraries/wasi-nn/test/.dockerignore deleted file mode 100644 index 94143827..00000000 --- a/core/iwasm/libraries/wasi-nn/test/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -Dockerfile diff --git a/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt b/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt index 7951dec4..eff716bd 100644 --- a/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt +++ b/core/iwasm/libraries/wasi-nn/test/CMakeLists.txt @@ -7,10 +7,10 @@ project (iwasm) set (CMAKE_VERBOSE_MAKEFILE OFF) # Reset default linker flags -set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "") -set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") set (CMAKE_C_STANDARD 99) set (CMAKE_CXX_STANDARD 14) +set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "") +set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") if (NOT DEFINED WAMR_BUILD_PLATFORM) set (WAMR_BUILD_PLATFORM "linux") diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile.base b/core/iwasm/libraries/wasi-nn/test/Dockerfile.base new file mode 100644 index 00000000..769c5389 --- /dev/null +++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.base @@ -0,0 +1,22 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +FROM ubuntu:20.04 AS base + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y \ + cmake build-essential git + +WORKDIR /home/wamr + +COPY . . + +WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build + +RUN cmake \ + -DWAMR_BUILD_WASI_NN=1 \ + -DTFLITE_ENABLE_GPU=ON \ + .. + +RUN make -j $(grep -c ^processor /proc/cpuinfo) diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile b/core/iwasm/libraries/wasi-nn/test/Dockerfile.compile similarity index 54% rename from core/iwasm/libraries/wasi-nn/test/Dockerfile rename to core/iwasm/libraries/wasi-nn/test/Dockerfile.compile index cb6bfa02..51a59707 100644 --- a/core/iwasm/libraries/wasi-nn/test/Dockerfile +++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.compile @@ -1,38 +1,23 @@ # Copyright (C) 2019 Intel Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -FROM ubuntu:22.04 +FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ cmake build-essential git wget python3.10 python3-pip -ARG WASI_SDK_VER=16 +ARG WASI_SDK_VER=19 RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \ && tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \ && ln -fs /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \ && rm /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -WORKDIR /home/wamr - -COPY core/deps/install_tensorflow.sh core/deps/install_tensorflow.sh -RUN ./core/deps/install_tensorflow.sh +WORKDIR /wasi-nn/test COPY core/iwasm/libraries/wasi-nn/test/requirements.txt . -RUN pip3 install -r requirements.txt -COPY core core -COPY build-scripts build-scripts -COPY product-mini product-mini +RUN pip3 install -r requirements.txt && rm requirements.txt -WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build - -RUN cmake -DWAMR_BUILD_WASI_NN=1 .. -RUN make -j $(grep -c ^processor /proc/cpuinfo) - -WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test - -RUN ./build.sh - -ENTRYPOINT [ "./build/iwasm", "--dir=.", "test_tensorflow.wasm" ] +ENTRYPOINT [ "bash", "./build.sh" ] diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu b/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu new file mode 100644 index 00000000..cec918cb --- /dev/null +++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.cpu @@ -0,0 +1,8 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +FROM ubuntu:20.04 + +COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm + +ENTRYPOINT [ "/run/iwasm" ] diff --git a/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu b/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu new file mode 100644 index 00000000..3d876efb --- /dev/null +++ b/core/iwasm/libraries/wasi-nn/test/Dockerfile.nvidia-gpu @@ -0,0 +1,20 @@ +# Copyright (C) 2019 Intel Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ocl-icd-libopencl1 \ + ocl-icd-opencl-dev \ + clinfo && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /etc/OpenCL/vendors && \ + echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd + +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility + +COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm + +ENTRYPOINT [ "/run/iwasm" ] diff --git a/core/iwasm/libraries/wasi-nn/test/build.sh b/core/iwasm/libraries/wasi-nn/test/build.sh index 4dc8d015..dbf2f2d6 100755 --- a/core/iwasm/libraries/wasi-nn/test/build.sh +++ b/core/iwasm/libraries/wasi-nn/test/build.sh @@ -7,7 +7,7 @@ -Wl,--allow-undefined \ -Wl,--strip-all,--no-entry \ --sysroot=/opt/wasi-sdk/share/wasi-sysroot \ - -I/home/wamr/core/iwasm/libraries/wasi-nn \ + -I.. \ -o test_tensorflow.wasm test_tensorflow.c # TFLite models to use in the tests diff --git a/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c b/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c old mode 100755 new mode 100644 index f813b6bc..46883d23 --- a/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c +++ b/core/iwasm/libraries/wasi-nn/test/test_tensorflow.c @@ -28,7 +28,7 @@ typedef struct { // WASI-NN wrappers error -wasm_load(char *model_name, graph *g) +wasm_load(char *model_name, graph *g, execution_target target) { FILE *pFile = fopen(model_name, "r"); if (pFile == NULL) @@ -64,7 +64,7 @@ wasm_load(char *model_name, graph *g) arr.buf[0].size = result; arr.buf[0].buf = buffer; - error res = load(&arr, tensorflowlite, cpu, g); + error res = load(&arr, tensorflowlite, target, g); fclose(pFile); free(buffer); @@ -115,11 +115,12 @@ wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor, // Inference float * -run_inference(float *input, uint32_t *input_size, uint32_t *output_size, - char *model_name, uint32_t num_output_tensors) +run_inference(execution_target target, float *input, uint32_t *input_size, + uint32_t *output_size, char *model_name, + uint32_t num_output_tensors) { graph graph; - if (wasm_load(model_name, &graph) != success) { + if (wasm_load(model_name, &graph, target) != success) { fprintf(stderr, "Error when loading model."); exit(1); } @@ -185,14 +186,14 @@ create_input(int *dims) // TESTS void -test_sum() +test_sum(execution_target target) { int dims[] = { 1, 5, 5, 1 }; input_info input = create_input(dims); uint32_t output_size = 0; - float *output = run_inference(input.input_tensor, input.dim, &output_size, - "models/sum.tflite", 1); + float *output = run_inference(target, input.input_tensor, input.dim, + &output_size, "/assets/models/sum.tflite", 1); assert(output_size == 1); assert(fabs(output[0] - 300.0) < EPSILON); @@ -203,14 +204,14 @@ test_sum() } void -test_max() +test_max(execution_target target) { int dims[] = { 1, 5, 5, 1 }; input_info input = create_input(dims); uint32_t output_size = 0; - float *output = run_inference(input.input_tensor, input.dim, &output_size, - "models/max.tflite", 1); + float *output = run_inference(target, input.input_tensor, input.dim, + &output_size, "/assets/models/max.tflite", 1); assert(output_size == 1); assert(fabs(output[0] - 24.0) < EPSILON); @@ -222,14 +223,15 @@ test_max() } void -test_average() +test_average(execution_target target) { int dims[] = { 1, 5, 5, 1 }; input_info input = create_input(dims); uint32_t output_size = 0; - float *output = run_inference(input.input_tensor, input.dim, &output_size, - "models/average.tflite", 1); + float *output = + run_inference(target, input.input_tensor, input.dim, &output_size, + "/assets/models/average.tflite", 1); assert(output_size == 1); assert(fabs(output[0] - 12.0) < EPSILON); @@ -241,14 +243,15 @@ test_average() } void -test_mult_dimensions() +test_mult_dimensions(execution_target target) { int dims[] = { 1, 3, 3, 1 }; input_info input = create_input(dims); uint32_t output_size = 0; - float *output = run_inference(input.input_tensor, input.dim, &output_size, - "models/mult_dim.tflite", 1); + float *output = + run_inference(target, input.input_tensor, input.dim, &output_size, + "/assets/models/mult_dim.tflite", 1); assert(output_size == 9); for (int i = 0; i < 9; i++) @@ -260,14 +263,15 @@ test_mult_dimensions() } void -test_mult_outputs() +test_mult_outputs(execution_target target) { int dims[] = { 1, 4, 4, 1 }; input_info input = create_input(dims); uint32_t output_size = 0; - float *output = run_inference(input.input_tensor, input.dim, &output_size, - "models/mult_out.tflite", 2); + float *output = + run_inference(target, input.input_tensor, input.dim, &output_size, + "/assets/models/mult_out.tflite", 2); assert(output_size == 8); // first tensor check @@ -285,16 +289,31 @@ test_mult_outputs() int main() { + char *env = getenv("TARGET"); + if (env == NULL) { + printf("Usage:\n--env=\"TARGET=[cpu|gpu]\"\n"); + return 1; + } + execution_target target; + if (strcmp(env, "cpu") == 0) + target = cpu; + else if (strcmp(env, "gpu") == 0) + target = gpu; + else { + printf("Wrong target!"); + return 1; + } printf("################### Testing sum...\n"); - test_sum(); + test_sum(target); printf("################### Testing max...\n"); - test_max(); + test_max(target); printf("################### Testing average...\n"); - test_average(); + test_average(target); printf("################### Testing multiple dimensions...\n"); - test_mult_dimensions(); + test_mult_dimensions(target); printf("################### Testing multiple outputs...\n"); - test_mult_outputs(); + test_mult_outputs(target); + printf("Tests: passed!\n"); return 0; }