Fangjun Kuang
Committed by GitHub

Upload RKNN models for sense-voice (#2592)

  1 +name: export-sense-voice-to-rknn
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - export-sense-voice-rknn-ci-2
  7 + workflow_dispatch:
  8 +
  9 +concurrency:
  10 + group: export-sense-voice-to-rknn-${{ github.ref }}
  11 + cancel-in-progress: true
  12 +
  13 +jobs:
  14 + export-sense-voice-to-rknn:
  15 + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
  16 + name: ${{ matrix.framework }} ${{ matrix.platform }} ${{ matrix.input_in_seconds }}
  17 + runs-on: ${{ matrix.os }}
  18 + strategy:
  19 + fail-fast: false
  20 + matrix:
  21 + os: [ubuntu-latest]
  22 + python-version: ["3.10"]
  23 + platform: ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"]
  24 + input_in_seconds: ["10", "15", "20", "25", "30"]
  25 + framework: ["FunASR", "WSYue-ASR"]
  26 +
  27 + steps:
  28 + - uses: actions/checkout@v4
  29 +
  30 + - name: Setup Python ${{ matrix.python-version }}
  31 + uses: actions/setup-python@v5
  32 + with:
  33 + python-version: ${{ matrix.python-version }}
  34 +
  35 + - name: Install Python dependencies
  36 + shell: bash
  37 + run: |
  38 + python3 -m pip install --upgrade \
  39 + pip \
  40 + "numpy<2" \
  41 + torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
  42 + onnx==1.17.0 \
  43 + onnxruntime==1.17.1 \
  44 + librosa \
  45 + soundfile \
  46 + onnxsim \
  47 + sentencepiece \
  48 + kaldi_native_fbank
  49 +
  50 + curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
  51 + pip install ./*.whl "numpy<=1.26.4"
  52 +
  53 + - name: Run SenseVoice from FunAsr
  54 + if: matrix.framework == 'FunASR'
  55 + shell: bash
  56 + run: |
  57 + cd scripts/sense-voice/rknn
  58 +
  59 + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
  60 + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt
  61 + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model
  62 +
  63 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
  64 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
  65 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
  66 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
  67 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
  68 +
  69 + rm -f README.md || true
  70 +
  71 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md
  72 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE
  73 +
  74 + echo "export to onnx"
  75 + t=${{ matrix.input_in_seconds }}
  76 + p=${{ matrix.platform }}
  77 +
  78 + echo "----$t---"
  79 + python3 ./export-onnx.py --input-len-in-seconds $t
  80 +
  81 + ls -lh *.onnx
  82 +
  83 + echo "test exported onnx models"
  84 +
  85 + echo "----------$t----------"
  86 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
  87 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ja.wav
  88 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ko.wav
  89 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
  90 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav
  91 +
  92 + echo "export to rknn"
  93 + echo "----------$t----------"
  94 + echo "----------$p----------"
  95 + python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null 2>&1
  96 +
  97 + ls -lh *.rknn
  98 +
  99 + echo "collect results"
  100 + d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17
  101 +
  102 + mkdir -p $d
  103 + mkdir -p $d/test_wavs
  104 +
  105 + cp -v README.md $d
  106 + cp -v LICENSE $d
  107 + cp -v model-$p-$t-seconds.rknn $d/model.rknn
  108 + cp -v tokens.txt $d
  109 + cp -v *.wav $d/test_wavs
  110 + ls -lh $d
  111 + tar cjfv $d.tar.bz2 $d
  112 + ls -lh *.tar.bz2
  113 + rm -rf d
  114 +
  115 + echo "----show---"
  116 + ls -lh *.tar.bz2
  117 +
  118 + mv *.tar.bz2 ../../..
  119 +
  120 + - name: Run SenseVoice from WSYue-ASR
  121 + if: matrix.framework == 'WSYue-ASR'
  122 + shell: bash
  123 + run: |
  124 + cd scripts/sense-voice/rknn
  125 +
  126 + curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt
  127 +
  128 + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
  129 + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model
  130 +
  131 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
  132 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
  133 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
  134 +
  135 + for i in $(seq 0 17); do
  136 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav
  137 + done
  138 +
  139 + rm -f README.md || true
  140 +
  141 + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md
  142 +
  143 + echo "export to onnx"
  144 + t=${{ matrix.input_in_seconds }}
  145 + p=${{ matrix.platform }}
  146 +
  147 + echo "----$t---"
  148 +
  149 + export model_author="ASLP-lab"
  150 + export comment="ASLP-lab/WSYue-ASR"
  151 + export url="https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/sensevoice_small_yue"
  152 +
  153 + python3 ./export-onnx.py --input-len-in-seconds $t
  154 +
  155 + ls -lh *.onnx
  156 +
  157 + echo "test exported onnx models"
  158 +
  159 + echo "----------$t----------"
  160 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav
  161 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav
  162 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav
  163 + for i in $(seq 0 17); do
  164 + echo "yue-$i.wav"
  165 + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue-$i.wav
  166 + done
  167 +
  168 + echo "export to rknn"
  169 + echo "----------$t----------"
  170 + echo "----------$p----------"
  171 + python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null 2>&1
  172 +
  173 + ls -lh *.rknn
  174 +
  175 + echo "collect results"
  176 + d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09
  177 +
  178 + mkdir -p $d
  179 + mkdir -p $d/test_wavs
  180 +
  181 + cp -v README.md $d
  182 + cp -v model-$p-$t-seconds.rknn $d/model.rknn
  183 + cp -v tokens.txt $d
  184 + cp -v *.wav $d/test_wavs
  185 + ls -lh $d
  186 + tar cjfv $d.tar.bz2 $d
  187 + ls -lh *.tar.bz2
  188 + rm -rf d
  189 +
  190 + echo "----show---"
  191 + ls -lh *.tar.bz2
  192 +
  193 + mv *.tar.bz2 ../../..
  194 +
  195 + - name: Release
  196 + if: github.repository_owner == 'csukuangfj'
  197 + uses: svenstaro/upload-release-action@v2
  198 + with:
  199 + file_glob: true
  200 + file: ./*.tar.bz2
  201 + overwrite: true
  202 + repo_name: k2-fsa/sherpa-onnx
  203 + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  204 + tag: asr-models
  205 +
  206 + - name: Release
  207 + if: github.repository_owner == 'k2-fsa'
  208 + uses: svenstaro/upload-release-action@v2
  209 + with:
  210 + file_glob: true
  211 + file: ./*.tar.bz2
  212 + overwrite: true
  213 + tag: asr-models
@@ -86,7 +86,7 @@ def main(): @@ -86,7 +86,7 @@ def main():
86 86
87 print("loading model") 87 print("loading model")
88 88
89 - state_dict = torch.load("./model.pt") 89 + state_dict = torch.load("./model.pt", map_location="cpu")
90 if "state_dict" in state_dict: 90 if "state_dict" in state_dict:
91 state_dict = state_dict["state_dict"] 91 state_dict = state_dict["state_dict"]
92 92
@@ -8,10 +8,8 @@ import torch.nn.functional as F @@ -8,10 +8,8 @@ import torch.nn.functional as F
8 8
9 9
10 class SinusoidalPositionEncoder(nn.Module): 10 class SinusoidalPositionEncoder(nn.Module):
11 - """ """  
12 -  
13 def __init__(self, d_model=80, dropout_rate=0.1): 11 def __init__(self, d_model=80, dropout_rate=0.1):
14 - pass 12 + super().__init__()
15 13
16 def encode( 14 def encode(
17 self, 15 self,