Fangjun Kuang
Committed by GitHub

Support using onnxruntime 1.16.0 with CUDA 11.4 on Jetson Orin NX (Linux arm64 GPU). (#1630)

* Support using onnxruntime 1.16.0 with CUDA 11.4 on Jetson Orin NX.

The pre-built onnxruntime libs are provided by the community
using the following command:

```bash
./build.sh --build_shared_lib --config Release --update \
  --build --parallel --use_cuda \
  --cuda_home /usr/local/cuda \
  --cudnn_home /usr/lib/aarch64-linux-gnu 2>&1 | tee my-log.txt
```

See also https://github.com/microsoft/onnxruntime/discussions/11226

---

Info about the board:

```
Model: NVIDIA Orin NX T801-16GB - Jetpack 5.1.4 [L4T 35.6.0]
```

```
nvidia@nvidia-desktop:~/Downloads$ head -n 1 /etc/nv_tegra_release
# R35 (release), REVISION: 6.0, GCID: 37391689, BOARD: t186ref, EABI: aarch64, DATE: Wed Aug 28 09:12:27 UTC 2024

nvidia@nvidia-desktop:~/Downloads$ uname -r
5.10.216-tegra

nvidia@nvidia-desktop:~/Downloads$ lsb_release -i -r
Distributor ID:	Ubuntu
Release:	20.04

nvidia@nvidia-desktop:~/Downloads$ nvcc -V
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:43:33_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0

nvidia@nvidia-desktop:~/Downloads$ dpkg -l libcudnn8
Desired=Unknown/Install/Remove/Purge/Hold
| Status=Not/Inst/Conf-files/Unpacked/halF-conf/Half-inst/trig-aWait/Trig-pend
|/ Err?=(none)/Reinst-required (Status,Err: uppercase=bad)
||/ Name           Version              Architecture Description
+++-==============-====================-============-=================================
ii  libcudnn8      8.6.0.166-1+cuda11.4 arm64        cuDNN runtime libraries

nvidia@nvidia-desktop:~/Downloads$ dpkg -l tensorrt
Desired=Unknown/Install/Remove/Purge/Hold
| Status=Not/Inst/Conf-files/Unpacked/halF-conf/Half-inst/trig-aWait/Trig-pend
|/ Err?=(none)/Reinst-required (Status,Err: uppercase=bad)
||/ Name           Version            Architecture Description
+++-==============-==================-============-=================================
ii  tensorrt       8.5.2.2-1+cuda11.4 arm64        Meta package for TensorRT
```
@@ -34,12 +34,20 @@ concurrency: @@ -34,12 +34,20 @@ concurrency:
34 jobs: 34 jobs:
35 aarch64_linux_gnu_shared: 35 aarch64_linux_gnu_shared:
36 runs-on: ${{ matrix.os }} 36 runs-on: ${{ matrix.os }}
37 - name: aarch64 shared GPU ${{ matrix.gpu }} 37 + name: aarch64 shared GPU ${{ matrix.gpu }} ${{ matrix.onnxruntime_version }}
38 strategy: 38 strategy:
39 fail-fast: false 39 fail-fast: false
40 matrix: 40 matrix:
41 - os: [ubuntu-latest]  
42 - gpu: [ON, OFF] 41 + include:
  42 + - os: ubuntu-latest
  43 + gpu: ON
  44 + onnxruntime_version: "1.11.0"
  45 + - os: ubuntu-latest
  46 + gpu: ON
  47 + onnxruntime_version: "1.16.0"
  48 + - os: ubuntu-latest
  49 + gpu: OFF
  50 + onnxruntime_version: ""
43 51
44 steps: 52 steps:
45 - uses: actions/checkout@v4 53 - uses: actions/checkout@v4
@@ -62,7 +70,7 @@ jobs: @@ -62,7 +70,7 @@ jobs:
62 if: steps.cache-qemu.outputs.cache-hit != 'true' 70 if: steps.cache-qemu.outputs.cache-hit != 'true'
63 run: | 71 run: |
64 sudo apt-get update 72 sudo apt-get update
65 - sudo apt-get install autoconf automake autotools-dev ninja-build 73 + sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
66 74
67 - name: checkout-qemu 75 - name: checkout-qemu
68 if: steps.cache-qemu.outputs.cache-hit != 'true' 76 if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -159,6 +167,7 @@ jobs: @@ -159,6 +167,7 @@ jobs:
159 167
160 export BUILD_SHARED_LIBS=ON 168 export BUILD_SHARED_LIBS=ON
161 export SHERPA_ONNX_ENABLE_GPU=${{ matrix.gpu }} 169 export SHERPA_ONNX_ENABLE_GPU=${{ matrix.gpu }}
  170 + export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=${{ matrix.onnxruntime_version }}
162 171
163 ./build-aarch64-linux-gnu.sh 172 ./build-aarch64-linux-gnu.sh
164 173
@@ -199,7 +208,7 @@ jobs: @@ -199,7 +208,7 @@ jobs:
199 if [[ ${{ matrix.gpu }} == OFF ]]; then 208 if [[ ${{ matrix.gpu }} == OFF ]]; then
200 dst=${dst}-cpu 209 dst=${dst}-cpu
201 else 210 else
202 - dst=${dst}-gpu 211 + dst=${dst}-gpu-onnxruntime-${{ matrix.onnxruntime_version }}
203 fi 212 fi
204 mkdir $dst 213 mkdir $dst
205 214
@@ -223,7 +232,7 @@ jobs: @@ -223,7 +232,7 @@ jobs:
223 232
224 - uses: actions/upload-artifact@v4 233 - uses: actions/upload-artifact@v4
225 with: 234 with:
226 - name: sherpa-onnx-linux-aarch64-shared-gpu-${{ matrix.gpu }} 235 + name: sherpa-onnx-linux-aarch64-shared-gpu-${{ matrix.gpu }}-onnxruntime-${{ matrix.onnxruntime_version }}
227 path: sherpa-onnx-*linux-aarch64-shared*.tar.bz2 236 path: sherpa-onnx-*linux-aarch64-shared*.tar.bz2
228 237
229 # https://huggingface.co/docs/hub/spaces-github-actions 238 # https://huggingface.co/docs/hub/spaces-github-actions
@@ -61,7 +61,7 @@ jobs: @@ -61,7 +61,7 @@ jobs:
61 if: steps.cache-qemu.outputs.cache-hit != 'true' 61 if: steps.cache-qemu.outputs.cache-hit != 'true'
62 run: | 62 run: |
63 sudo apt-get update 63 sudo apt-get update
64 - sudo apt-get install autoconf automake autotools-dev ninja-build 64 + sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
65 65
66 - name: checkout-qemu 66 - name: checkout-qemu
67 if: steps.cache-qemu.outputs.cache-hit != 'true' 67 if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -62,7 +62,7 @@ jobs: @@ -62,7 +62,7 @@ jobs:
62 if: steps.cache-qemu.outputs.cache-hit != 'true' 62 if: steps.cache-qemu.outputs.cache-hit != 'true'
63 run: | 63 run: |
64 sudo apt-get update 64 sudo apt-get update
65 - sudo apt-get install autoconf automake autotools-dev ninja-build 65 + sudo apt-get install autoconf automake autotools-dev ninja-build libglib2.0-dev.
66 66
67 - name: checkout-qemu 67 - name: checkout-qemu
68 if: steps.cache-qemu.outputs.cache-hit != 'true' 68 if: steps.cache-qemu.outputs.cache-hit != 'true'
@@ -46,6 +46,9 @@ option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-i @@ -46,6 +46,9 @@ option(SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE "True to use pre-i
46 option(SHERPA_ONNX_ENABLE_SANITIZER "Whether to enable ubsan and asan" OFF) 46 option(SHERPA_ONNX_ENABLE_SANITIZER "Whether to enable ubsan and asan" OFF)
47 option(SHERPA_ONNX_BUILD_C_API_EXAMPLES "Whether to enable C API examples" ON) 47 option(SHERPA_ONNX_BUILD_C_API_EXAMPLES "Whether to enable C API examples" ON)
48 48
  49 +set(SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION "1.11.0" CACHE STRING "Used only for Linux ARM64 GPU. If you use Jetson nano b01, then please set it to 1.11.0. If you use Jetson Orin NX, then set it to 1.16.0")
  50 +
  51 +
49 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 52 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
50 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 53 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
51 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") 54 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
  2 +#
  3 +# Usage of this file
  4 +#
  5 +# (1) Build CPU version of sherpa-onnx
  6 +# ./build-aarch64-linux-gnu.sh
  7 +#
  8 +# (2) Build GPU version of sherpa-onnx
  9 +#
  10 +# (a) Make sure your board has NVIDIA GPU(s)
  11 +#
  12 +# (b) For Jetson Nano B01 (using CUDA 10.2)
  13 +#
  14 +# export SHERPA_ONNX_ENABLE_GPU=ON
  15 +# export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
  16 +# ./build-aarch64-linux-gnu.sh
  17 +#
  18 +# (c) For Jetson Orin NX (using CUDA 11.4)
  19 +#
  20 +# export SHERPA_ONNX_ENABLE_GPU=ON
  21 +# export SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
  22 +# ./build-aarch64-linux-gnu.sh
2 23
3 if command -v aarch64-none-linux-gnu-gcc &> /dev/null; then 24 if command -v aarch64-none-linux-gnu-gcc &> /dev/null; then
4 ln -svf $(which aarch64-none-linux-gnu-gcc) ./aarch64-linux-gnu-gcc 25 ln -svf $(which aarch64-none-linux-gnu-gcc) ./aarch64-linux-gnu-gcc
@@ -47,11 +68,6 @@ fi @@ -47,11 +68,6 @@ fi
47 if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"" ]]; then 68 if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"" ]]; then
48 # By default, use CPU 69 # By default, use CPU
49 SHERPA_ONNX_ENABLE_GPU=OFF 70 SHERPA_ONNX_ENABLE_GPU=OFF
50 -  
51 - # If you use GPU, then please make sure you have NVIDIA GPUs on your board.  
52 - # It uses onnxruntime 1.11.0.  
53 - #  
54 - # Tested on Jetson Nano B01  
55 fi 71 fi
56 72
57 if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"ON" ]]; then 73 if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"ON" ]]; then
@@ -59,6 +75,11 @@ if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"ON" ]]; then @@ -59,6 +75,11 @@ if [[ x"$SHERPA_ONNX_ENABLE_GPU" == x"ON" ]]; then
59 BUILD_SHARED_LIBS=ON 75 BUILD_SHARED_LIBS=ON
60 fi 76 fi
61 77
  78 +if [[ x"$SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION" == x"" ]]; then
  79 + # Used only when SHERPA_ONNX_ENABLE_GPU is ON
  80 + SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION="1.11.0"
  81 +fi
  82 +
62 cmake \ 83 cmake \
63 -DBUILD_PIPER_PHONMIZE_EXE=OFF \ 84 -DBUILD_PIPER_PHONMIZE_EXE=OFF \
64 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ 85 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
@@ -75,6 +96,7 @@ cmake \ @@ -75,6 +96,7 @@ cmake \
75 -DSHERPA_ONNX_ENABLE_JNI=OFF \ 96 -DSHERPA_ONNX_ENABLE_JNI=OFF \
76 -DSHERPA_ONNX_ENABLE_C_API=ON \ 97 -DSHERPA_ONNX_ENABLE_C_API=ON \
77 -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON \ 98 -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON \
  99 + -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=$SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION \
78 -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \ 100 -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \
79 .. 101 ..
80 102
@@ -18,19 +18,37 @@ if(NOT SHERPA_ONNX_ENABLE_GPU) @@ -18,19 +18,37 @@ if(NOT SHERPA_ONNX_ENABLE_GPU)
18 message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}") 18 message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
19 endif() 19 endif()
20 20
21 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.11.0/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2")  
22 -set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2")  
23 -set(onnxruntime_HASH "SHA256=36eded935551e23aead09d4173bdf0bd1e7b01fdec15d77f97d6e34029aa60d7") 21 +message(WARNING "\
  22 +SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION: ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION}
  23 +If you use Jetson nano b01, then please pass
  24 + -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.11.0
  25 +to cmake (You need to make sure CUDA 10.2 is available on your board).
  26 +
  27 +If you use Jetson Orin NX, then please pass
  28 + -DSHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION=1.16.0
  29 +to cmake (You need to make sure CUDA 11.4 is available on your board).
  30 +")
  31 +
  32 +set(v ${SHERPA_ONNX_LINUX_ARM64_GPU_ONNXRUNTIME_VERSION})
  33 +
  34 +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${v}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
  35 +set(onnxruntime_URL2 "https://hf-mirror.com/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2")
  36 +
  37 +if(v STREQUAL "1.11.0")
  38 + set(onnxruntime_HASH "SHA256=36eded935551e23aead09d4173bdf0bd1e7b01fdec15d77f97d6e34029aa60d7")
  39 +else()
  40 + set(onnxruntime_HASH "SHA256=4c09d5acf2c2682b4eab1dc2f1ad98fc1fde5f5f1960063e337983ba59379a4b")
  41 +endif()
24 42
25 # If you don't have access to the Internet, 43 # If you don't have access to the Internet,
26 # please download onnxruntime to one of the following locations. 44 # please download onnxruntime to one of the following locations.
27 # You can add more if you want. 45 # You can add more if you want.
28 set(possible_file_locations 46 set(possible_file_locations
29 - $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2  
30 - ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2  
31 - ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2  
32 - /tmp/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2  
33 - /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-1.11.0.tar.bz2 47 + $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  48 + ${CMAKE_SOURCE_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  49 + ${CMAKE_BINARY_DIR}/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  50 + /tmp/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
  51 + /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-gpu-${v}.tar.bz2
34 ) 52 )
35 53
36 foreach(f IN LISTS possible_file_locations) 54 foreach(f IN LISTS possible_file_locations)