wasi-nn: Enable GPU support (#1922)

- Split logic in several dockers
  - runtime: wasi-nn-cpu and wasi-nn- Nvidia-gpu.
  - compilation: wasi-nn-compile. Prepare the testing wasm and generates the TFLites.
- Implement GPU support for TFLite with Opencl.
This commit is contained in:
tonibofarull 2023-02-02 01:09:46 +01:00 committed by GitHub
parent fe3347d5d2
commit 1614ce12fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 178 additions and 56 deletions

View File

@ -17,24 +17,69 @@ By only including this file in your WASM application you will bind WASI-NN into
To run the tests we assume that the current directory is the root of the repository.
1. Build the docker image,
### Build the runtime
Build the runtime base image,
```
docker build -t wasi-nn -f core/iwasm/libraries/wasi-nn/test/Dockerfile .
docker build -t wasi-nn-base -f core/iwasm/libraries/wasi-nn/test/Dockerfile.base .
```
2. Run the container
Build the runtime image for your execution target type.
`EXECUTION_TYPE` can be:
* `cpu`
* `nvidia-gpu`
```
docker run wasi-nn
EXECUTION_TYPE=cpu
docker build -t wasi-nn-${EXECUTION_TYPE} -f core/iwasm/libraries/wasi-nn/test/Dockerfile.${EXECUTION_TYPE} .
```
### Build wasm app
```
docker build -t wasi-nn-compile -f core/iwasm/libraries/wasi-nn/test/Dockerfile.compile .
```
```
docker run -v $PWD/core/iwasm/libraries/wasi-nn:/wasi-nn wasi-nn-compile
```
### Run wasm app
If all the tests have run properly you will the the following message in the terminal,
```
Tests: passed!
```
* CPU
```
docker run \
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-cpu \
--dir=/assets \
--env="TARGET=cpu" \
/assets/test_tensorflow.wasm
```
* (NVIDIA) GPU
```
docker run \
--runtime=nvidia \
-v $PWD/core/iwasm/libraries/wasi-nn/test:/assets wasi-nn-nvidia-gpu \
--dir=/assets \
--env="TARGET=gpu" \
/assets/test_tensorflow.wasm
```
Requirements:
* [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker).
## What is missing
Supported:
@ -43,5 +88,5 @@ Supported:
* Only 1 model at a time.
* `graph` and `graph-execution-context` are ignored.
* Graph encoding: `tensorflowlite`.
* Execution target: `cpu`.
* Execution target: `cpu` and `gpu`.
* Tensor type: `fp32`.

View File

@ -16,6 +16,7 @@
#include <tensorflow/lite/model.h>
#include <tensorflow/lite/optional_debug_tools.h>
#include <tensorflow/lite/error_reporter.h>
#include <tensorflow/lite/delegates/gpu/delegate.h>
/* Global variables */
@ -45,8 +46,8 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
return invalid_argument;
}
if (target != cpu) {
NN_ERR_PRINTF("Only CPU target is supported.");
if (target != cpu && target != gpu) {
NN_ERR_PRINTF("Only CPU and GPU target is supported.");
return invalid_argument;
}
@ -79,6 +80,29 @@ tensorflowlite_load(graph_builder_array *builder, graph_encoding encoding,
return missing_memory;
}
bool use_default = false;
switch (target) {
case gpu:
{
// https://www.tensorflow.org/lite/performance/gpu
auto options = TfLiteGpuDelegateOptionsV2Default();
options.inference_preference =
TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
options.inference_priority1 =
TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
auto *delegate = TfLiteGpuDelegateV2Create(&options);
if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
NN_ERR_PRINTF("Error when enabling GPU delegate.");
use_default = true;
}
break;
}
default:
use_default = true;
}
if (use_default)
NN_WARN_PRINTF("Default encoding is CPU.");
return success;
}

View File

@ -1 +0,0 @@
Dockerfile

View File

@ -7,10 +7,10 @@ project (iwasm)
set (CMAKE_VERBOSE_MAKEFILE OFF)
# Reset default linker flags
set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
set (CMAKE_C_STANDARD 99)
set (CMAKE_CXX_STANDARD 14)
set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
if (NOT DEFINED WAMR_BUILD_PLATFORM)
set (WAMR_BUILD_PLATFORM "linux")

View File

@ -0,0 +1,22 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:20.04 AS base
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
cmake build-essential git
WORKDIR /home/wamr
COPY . .
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
RUN cmake \
-DWAMR_BUILD_WASI_NN=1 \
-DTFLITE_ENABLE_GPU=ON \
..
RUN make -j $(grep -c ^processor /proc/cpuinfo)

View File

@ -1,38 +1,23 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:22.04
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
cmake build-essential git wget python3.10 python3-pip
ARG WASI_SDK_VER=16
ARG WASI_SDK_VER=19
RUN wget -c --progress=dot:giga https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VER}/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -P /opt \
&& tar xf /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz -C /opt \
&& ln -fs /opt/wasi-sdk-${WASI_SDK_VER}.0 /opt/wasi-sdk \
&& rm /opt/wasi-sdk-${WASI_SDK_VER}.0-linux.tar.gz
WORKDIR /home/wamr
COPY core/deps/install_tensorflow.sh core/deps/install_tensorflow.sh
RUN ./core/deps/install_tensorflow.sh
WORKDIR /wasi-nn/test
COPY core/iwasm/libraries/wasi-nn/test/requirements.txt .
RUN pip3 install -r requirements.txt
COPY core core
COPY build-scripts build-scripts
COPY product-mini product-mini
RUN pip3 install -r requirements.txt && rm requirements.txt
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test/build
RUN cmake -DWAMR_BUILD_WASI_NN=1 ..
RUN make -j $(grep -c ^processor /proc/cpuinfo)
WORKDIR /home/wamr/core/iwasm/libraries/wasi-nn/test
RUN ./build.sh
ENTRYPOINT [ "./build/iwasm", "--dir=.", "test_tensorflow.wasm" ]
ENTRYPOINT [ "bash", "./build.sh" ]

View File

@ -0,0 +1,8 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM ubuntu:20.04
COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
ENTRYPOINT [ "/run/iwasm" ]

View File

@ -0,0 +1,20 @@
# Copyright (C) 2019 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
FROM nvidia/cuda:11.3.0-runtime-ubuntu20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
ocl-icd-libopencl1 \
ocl-icd-opencl-dev \
clinfo && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /etc/OpenCL/vendors && \
echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
COPY --from=wasi-nn-base /home/wamr/core/iwasm/libraries/wasi-nn/test/build/iwasm /run/iwasm
ENTRYPOINT [ "/run/iwasm" ]

View File

@ -7,7 +7,7 @@
-Wl,--allow-undefined \
-Wl,--strip-all,--no-entry \
--sysroot=/opt/wasi-sdk/share/wasi-sysroot \
-I/home/wamr/core/iwasm/libraries/wasi-nn \
-I.. \
-o test_tensorflow.wasm test_tensorflow.c
# TFLite models to use in the tests

69
core/iwasm/libraries/wasi-nn/test/test_tensorflow.c Executable file → Normal file
View File

@ -28,7 +28,7 @@ typedef struct {
// WASI-NN wrappers
error
wasm_load(char *model_name, graph *g)
wasm_load(char *model_name, graph *g, execution_target target)
{
FILE *pFile = fopen(model_name, "r");
if (pFile == NULL)
@ -64,7 +64,7 @@ wasm_load(char *model_name, graph *g)
arr.buf[0].size = result;
arr.buf[0].buf = buffer;
error res = load(&arr, tensorflowlite, cpu, g);
error res = load(&arr, tensorflowlite, target, g);
fclose(pFile);
free(buffer);
@ -115,11 +115,12 @@ wasm_get_output(graph_execution_context ctx, uint32_t index, float *out_tensor,
// Inference
float *
run_inference(float *input, uint32_t *input_size, uint32_t *output_size,
char *model_name, uint32_t num_output_tensors)
run_inference(execution_target target, float *input, uint32_t *input_size,
uint32_t *output_size, char *model_name,
uint32_t num_output_tensors)
{
graph graph;
if (wasm_load(model_name, &graph) != success) {
if (wasm_load(model_name, &graph, target) != success) {
fprintf(stderr, "Error when loading model.");
exit(1);
}
@ -185,14 +186,14 @@ create_input(int *dims)
// TESTS
void
test_sum()
test_sum(execution_target target)
{
int dims[] = { 1, 5, 5, 1 };
input_info input = create_input(dims);
uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size,
"models/sum.tflite", 1);
float *output = run_inference(target, input.input_tensor, input.dim,
&output_size, "/assets/models/sum.tflite", 1);
assert(output_size == 1);
assert(fabs(output[0] - 300.0) < EPSILON);
@ -203,14 +204,14 @@ test_sum()
}
void
test_max()
test_max(execution_target target)
{
int dims[] = { 1, 5, 5, 1 };
input_info input = create_input(dims);
uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size,
"models/max.tflite", 1);
float *output = run_inference(target, input.input_tensor, input.dim,
&output_size, "/assets/models/max.tflite", 1);
assert(output_size == 1);
assert(fabs(output[0] - 24.0) < EPSILON);
@ -222,14 +223,15 @@ test_max()
}
void
test_average()
test_average(execution_target target)
{
int dims[] = { 1, 5, 5, 1 };
input_info input = create_input(dims);
uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size,
"models/average.tflite", 1);
float *output =
run_inference(target, input.input_tensor, input.dim, &output_size,
"/assets/models/average.tflite", 1);
assert(output_size == 1);
assert(fabs(output[0] - 12.0) < EPSILON);
@ -241,14 +243,15 @@ test_average()
}
void
test_mult_dimensions()
test_mult_dimensions(execution_target target)
{
int dims[] = { 1, 3, 3, 1 };
input_info input = create_input(dims);
uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size,
"models/mult_dim.tflite", 1);
float *output =
run_inference(target, input.input_tensor, input.dim, &output_size,
"/assets/models/mult_dim.tflite", 1);
assert(output_size == 9);
for (int i = 0; i < 9; i++)
@ -260,14 +263,15 @@ test_mult_dimensions()
}
void
test_mult_outputs()
test_mult_outputs(execution_target target)
{
int dims[] = { 1, 4, 4, 1 };
input_info input = create_input(dims);
uint32_t output_size = 0;
float *output = run_inference(input.input_tensor, input.dim, &output_size,
"models/mult_out.tflite", 2);
float *output =
run_inference(target, input.input_tensor, input.dim, &output_size,
"/assets/models/mult_out.tflite", 2);
assert(output_size == 8);
// first tensor check
@ -285,16 +289,31 @@ test_mult_outputs()
int
main()
{
char *env = getenv("TARGET");
if (env == NULL) {
printf("Usage:\n--env=\"TARGET=[cpu|gpu]\"\n");
return 1;
}
execution_target target;
if (strcmp(env, "cpu") == 0)
target = cpu;
else if (strcmp(env, "gpu") == 0)
target = gpu;
else {
printf("Wrong target!");
return 1;
}
printf("################### Testing sum...\n");
test_sum();
test_sum(target);
printf("################### Testing max...\n");
test_max();
test_max(target);
printf("################### Testing average...\n");
test_average();
test_average(target);
printf("################### Testing multiple dimensions...\n");
test_mult_dimensions();
test_mult_dimensions(target);
printf("################### Testing multiple outputs...\n");
test_mult_outputs();
test_mult_outputs(target);
printf("Tests: passed!\n");
return 0;
}