Committed by
GitHub
Upload RKNN models for sense-voice (#2592)
正在显示
3 个修改的文件
包含
215 行增加
和
4 行删除
| 1 | +name: export-sense-voice-to-rknn | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - export-sense-voice-rknn-ci-2 | ||
| 7 | + workflow_dispatch: | ||
| 8 | + | ||
| 9 | +concurrency: | ||
| 10 | + group: export-sense-voice-to-rknn-${{ github.ref }} | ||
| 11 | + cancel-in-progress: true | ||
| 12 | + | ||
| 13 | +jobs: | ||
| 14 | + export-sense-voice-to-rknn: | ||
| 15 | + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' | ||
| 16 | + name: ${{ matrix.framework }} ${{ matrix.platform }} ${{ matrix.input_in_seconds }} | ||
| 17 | + runs-on: ${{ matrix.os }} | ||
| 18 | + strategy: | ||
| 19 | + fail-fast: false | ||
| 20 | + matrix: | ||
| 21 | + os: [ubuntu-latest] | ||
| 22 | + python-version: ["3.10"] | ||
| 23 | + platform: ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"] | ||
| 24 | + input_in_seconds: ["10", "15", "20", "25", "30"] | ||
| 25 | + framework: ["FunASR", "WSYue-ASR"] | ||
| 26 | + | ||
| 27 | + steps: | ||
| 28 | + - uses: actions/checkout@v4 | ||
| 29 | + | ||
| 30 | + - name: Setup Python ${{ matrix.python-version }} | ||
| 31 | + uses: actions/setup-python@v5 | ||
| 32 | + with: | ||
| 33 | + python-version: ${{ matrix.python-version }} | ||
| 34 | + | ||
| 35 | + - name: Install Python dependencies | ||
| 36 | + shell: bash | ||
| 37 | + run: | | ||
| 38 | + python3 -m pip install --upgrade \ | ||
| 39 | + pip \ | ||
| 40 | + "numpy<2" \ | ||
| 41 | + torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \ | ||
| 42 | + onnx==1.17.0 \ | ||
| 43 | + onnxruntime==1.17.1 \ | ||
| 44 | + librosa \ | ||
| 45 | + soundfile \ | ||
| 46 | + onnxsim \ | ||
| 47 | + sentencepiece \ | ||
| 48 | + kaldi_native_fbank | ||
| 49 | + | ||
| 50 | + curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl | ||
| 51 | + pip install ./*.whl "numpy<=1.26.4" | ||
| 52 | + | ||
| 53 | + - name: Run SenseVoice from FunAsr | ||
| 54 | + if: matrix.framework == 'FunASR' | ||
| 55 | + shell: bash | ||
| 56 | + run: | | ||
| 57 | + cd scripts/sense-voice/rknn | ||
| 58 | + | ||
| 59 | + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn | ||
| 60 | + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt | ||
| 61 | + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model | ||
| 62 | + | ||
| 63 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav | ||
| 64 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav | ||
| 65 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav | ||
| 66 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav | ||
| 67 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav | ||
| 68 | + | ||
| 69 | + rm -f README.md || true | ||
| 70 | + | ||
| 71 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md | ||
| 72 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE | ||
| 73 | + | ||
| 74 | + echo "export to onnx" | ||
| 75 | + t=${{ matrix.input_in_seconds }} | ||
| 76 | + p=${{ matrix.platform }} | ||
| 77 | + | ||
| 78 | + echo "----$t---" | ||
| 79 | + python3 ./export-onnx.py --input-len-in-seconds $t | ||
| 80 | + | ||
| 81 | + ls -lh *.onnx | ||
| 82 | + | ||
| 83 | + echo "test exported onnx models" | ||
| 84 | + | ||
| 85 | + echo "----------$t----------" | ||
| 86 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav | ||
| 87 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ja.wav | ||
| 88 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./ko.wav | ||
| 89 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav | ||
| 90 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav | ||
| 91 | + | ||
| 92 | + echo "export to rknn" | ||
| 93 | + echo "----------$t----------" | ||
| 94 | + echo "----------$p----------" | ||
| 95 | + python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null 2>&1 | ||
| 96 | + | ||
| 97 | + ls -lh *.rknn | ||
| 98 | + | ||
| 99 | + echo "collect results" | ||
| 100 | + d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2024-07-17 | ||
| 101 | + | ||
| 102 | + mkdir -p $d | ||
| 103 | + mkdir -p $d/test_wavs | ||
| 104 | + | ||
| 105 | + cp -v README.md $d | ||
| 106 | + cp -v LICENSE $d | ||
| 107 | + cp -v model-$p-$t-seconds.rknn $d/model.rknn | ||
| 108 | + cp -v tokens.txt $d | ||
| 109 | + cp -v *.wav $d/test_wavs | ||
| 110 | + ls -lh $d | ||
| 111 | + tar cjfv $d.tar.bz2 $d | ||
| 112 | + ls -lh *.tar.bz2 | ||
| 113 | + rm -rf d | ||
| 114 | + | ||
| 115 | + echo "----show---" | ||
| 116 | + ls -lh *.tar.bz2 | ||
| 117 | + | ||
| 118 | + mv *.tar.bz2 ../../.. | ||
| 119 | + | ||
| 120 | + - name: Run SenseVoice from WSYue-ASR | ||
| 121 | + if: matrix.framework == 'WSYue-ASR' | ||
| 122 | + shell: bash | ||
| 123 | + run: | | ||
| 124 | + cd scripts/sense-voice/rknn | ||
| 125 | + | ||
| 126 | + curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt | ||
| 127 | + | ||
| 128 | + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn | ||
| 129 | + curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model | ||
| 130 | + | ||
| 131 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav | ||
| 132 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav | ||
| 133 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav | ||
| 134 | + | ||
| 135 | + for i in $(seq 0 17); do | ||
| 136 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav | ||
| 137 | + done | ||
| 138 | + | ||
| 139 | + rm -f README.md || true | ||
| 140 | + | ||
| 141 | + curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md | ||
| 142 | + | ||
| 143 | + echo "export to onnx" | ||
| 144 | + t=${{ matrix.input_in_seconds }} | ||
| 145 | + p=${{ matrix.platform }} | ||
| 146 | + | ||
| 147 | + echo "----$t---" | ||
| 148 | + | ||
| 149 | + export model_author="ASLP-lab" | ||
| 150 | + export comment="ASLP-lab/WSYue-ASR" | ||
| 151 | + export url="https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/sensevoice_small_yue" | ||
| 152 | + | ||
| 153 | + python3 ./export-onnx.py --input-len-in-seconds $t | ||
| 154 | + | ||
| 155 | + ls -lh *.onnx | ||
| 156 | + | ||
| 157 | + echo "test exported onnx models" | ||
| 158 | + | ||
| 159 | + echo "----------$t----------" | ||
| 160 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./en.wav | ||
| 161 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue.wav | ||
| 162 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./zh.wav | ||
| 163 | + for i in $(seq 0 17); do | ||
| 164 | + echo "yue-$i.wav" | ||
| 165 | + python3 ./test_onnx.py --model model-$t-seconds.onnx --tokens ./tokens.txt --wave ./yue-$i.wav | ||
| 166 | + done | ||
| 167 | + | ||
| 168 | + echo "export to rknn" | ||
| 169 | + echo "----------$t----------" | ||
| 170 | + echo "----------$p----------" | ||
| 171 | + python3 export-rknn.py --target-platform $p --in-model model-$t-seconds.onnx --out-model model-$p-$t-seconds.rknn >/dev/null 2>&1 | ||
| 172 | + | ||
| 173 | + ls -lh *.rknn | ||
| 174 | + | ||
| 175 | + echo "collect results" | ||
| 176 | + d=sherpa-onnx-$p-$t-seconds-sense-voice-zh-en-ja-ko-yue-2025-09-09 | ||
| 177 | + | ||
| 178 | + mkdir -p $d | ||
| 179 | + mkdir -p $d/test_wavs | ||
| 180 | + | ||
| 181 | + cp -v README.md $d | ||
| 182 | + cp -v model-$p-$t-seconds.rknn $d/model.rknn | ||
| 183 | + cp -v tokens.txt $d | ||
| 184 | + cp -v *.wav $d/test_wavs | ||
| 185 | + ls -lh $d | ||
| 186 | + tar cjfv $d.tar.bz2 $d | ||
| 187 | + ls -lh *.tar.bz2 | ||
| 188 | + rm -rf d | ||
| 189 | + | ||
| 190 | + echo "----show---" | ||
| 191 | + ls -lh *.tar.bz2 | ||
| 192 | + | ||
| 193 | + mv *.tar.bz2 ../../.. | ||
| 194 | + | ||
| 195 | + - name: Release | ||
| 196 | + if: github.repository_owner == 'csukuangfj' | ||
| 197 | + uses: svenstaro/upload-release-action@v2 | ||
| 198 | + with: | ||
| 199 | + file_glob: true | ||
| 200 | + file: ./*.tar.bz2 | ||
| 201 | + overwrite: true | ||
| 202 | + repo_name: k2-fsa/sherpa-onnx | ||
| 203 | + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 204 | + tag: asr-models | ||
| 205 | + | ||
| 206 | + - name: Release | ||
| 207 | + if: github.repository_owner == 'k2-fsa' | ||
| 208 | + uses: svenstaro/upload-release-action@v2 | ||
| 209 | + with: | ||
| 210 | + file_glob: true | ||
| 211 | + file: ./*.tar.bz2 | ||
| 212 | + overwrite: true | ||
| 213 | + tag: asr-models |
| @@ -86,7 +86,7 @@ def main(): | @@ -86,7 +86,7 @@ def main(): | ||
| 86 | 86 | ||
| 87 | print("loading model") | 87 | print("loading model") |
| 88 | 88 | ||
| 89 | - state_dict = torch.load("./model.pt") | 89 | + state_dict = torch.load("./model.pt", map_location="cpu") |
| 90 | if "state_dict" in state_dict: | 90 | if "state_dict" in state_dict: |
| 91 | state_dict = state_dict["state_dict"] | 91 | state_dict = state_dict["state_dict"] |
| 92 | 92 |
| @@ -8,10 +8,8 @@ import torch.nn.functional as F | @@ -8,10 +8,8 @@ import torch.nn.functional as F | ||
| 8 | 8 | ||
| 9 | 9 | ||
| 10 | class SinusoidalPositionEncoder(nn.Module): | 10 | class SinusoidalPositionEncoder(nn.Module): |
| 11 | - """ """ | ||
| 12 | - | ||
| 13 | def __init__(self, d_model=80, dropout_rate=0.1): | 11 | def __init__(self, d_model=80, dropout_rate=0.1): |
| 14 | - pass | 12 | + super().__init__() |
| 15 | 13 | ||
| 16 | def encode( | 14 | def encode( |
| 17 | self, | 15 | self, |
-
请 注册 或 登录 后发表评论