正在显示
3 个修改的文件
包含
338 行增加
和
0 行删除
.github/workflows/upload-models.yaml
0 → 100644
| 1 | +name: upload-models | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - upload-models | ||
| 7 | + workflow_dispatch: | ||
| 8 | + | ||
| 9 | +concurrency: | ||
| 10 | + group: upload-models-${{ github.ref }} | ||
| 11 | + cancel-in-progress: true | ||
| 12 | + | ||
| 13 | +jobs: | ||
| 14 | + upload-models: | ||
| 15 | + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' | ||
| 16 | + name: upload models | ||
| 17 | + runs-on: ${{ matrix.os }} | ||
| 18 | + strategy: | ||
| 19 | + fail-fast: false | ||
| 20 | + matrix: | ||
| 21 | + os: [ubuntu-latest] | ||
| 22 | + python-version: ["3.8"] | ||
| 23 | + | ||
| 24 | + steps: | ||
| 25 | + - uses: actions/checkout@v4 | ||
| 26 | + | ||
| 27 | + - name: Vietnamese (zipformer) | ||
| 28 | + shell: bash | ||
| 29 | + run: | | ||
| 30 | + rm -rf models | ||
| 31 | + mkdir models | ||
| 32 | + cd models | ||
| 33 | + cat >README.md <<EOF | ||
| 34 | + # Introduction | ||
| 35 | + Models in this directory are from | ||
| 36 | + https://huggingface.co/zzasdf/viet_iter3_pseudo_label | ||
| 37 | + which are trained on about 70k hours of data. | ||
| 38 | + EOF | ||
| 39 | + | ||
| 40 | + git lfs install | ||
| 41 | + git clone https://huggingface.co/csukuangfj/viet_iter3_pseudo_label hf | ||
| 42 | + | ||
| 43 | + ls -lh | ||
| 44 | + | ||
| 45 | + d=sherpa-onnx-zipformer-vi-2025-04-20 | ||
| 46 | + mkdir -p $d | ||
| 47 | + cp -v hf/exp/encoder-epoch-12-avg-8.onnx $d/ | ||
| 48 | + cp -v hf/exp/decoder-epoch-12-avg-8.onnx $d/ | ||
| 49 | + cp -v hf/exp/joiner-epoch-12-avg-8.onnx $d/ | ||
| 50 | + cp -v hf/data/Vietnam_bpe_2000_new/bpe.model $d/ | ||
| 51 | + cp -v hf/data/Vietnam_bpe_2000_new/tokens.txt $d/ | ||
| 52 | + cp -av hf/test_wavs $d | ||
| 53 | + cp -v README.md $d | ||
| 54 | + | ||
| 55 | + tar cjfv $d.tar.bz2 $d | ||
| 56 | + | ||
| 57 | + d=sherpa-onnx-zipformer-vi-int8-2025-04-20 | ||
| 58 | + mkdir -p $d | ||
| 59 | + | ||
| 60 | + cp -v hf/exp/encoder-epoch-12-avg-8.int8.onnx $d/ | ||
| 61 | + cp -v hf/exp/decoder-epoch-12-avg-8.onnx $d/ | ||
| 62 | + cp -v hf/exp/joiner-epoch-12-avg-8.int8.onnx $d/ | ||
| 63 | + cp -v hf/data/Vietnam_bpe_2000_new/bpe.model $d/ | ||
| 64 | + cp -v hf/data/Vietnam_bpe_2000_new/tokens.txt $d/ | ||
| 65 | + cp -av hf/test_wavs $d | ||
| 66 | + cp -v README.md $d | ||
| 67 | + | ||
| 68 | + tar cjfv $d.tar.bz2 $d | ||
| 69 | + | ||
| 70 | + rm -rf hf | ||
| 71 | + | ||
| 72 | + ls -lh | ||
| 73 | + | ||
| 74 | + cd .. | ||
| 75 | + | ||
| 76 | + mv models/* . | ||
| 77 | + | ||
| 78 | + - name: Publish to huggingface (Vietnamese zipformer) | ||
| 79 | + env: | ||
| 80 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 81 | + uses: nick-fields/retry@v3 | ||
| 82 | + with: | ||
| 83 | + max_attempts: 20 | ||
| 84 | + timeout_seconds: 200 | ||
| 85 | + shell: bash | ||
| 86 | + command: | | ||
| 87 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 88 | + git config --global user.name "Fangjun Kuang" | ||
| 89 | + models=( | ||
| 90 | + sherpa-onnx-zipformer-vi-2025-04-20 | ||
| 91 | + sherpa-onnx-zipformer-vi-int8-2025-04-20 | ||
| 92 | + ) | ||
| 93 | + for d in ${models[@]}; do | ||
| 94 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 95 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 96 | + rm -rf huggingface | ||
| 97 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface | ||
| 98 | + cp -av $d/* huggingface | ||
| 99 | + | ||
| 100 | + pushd huggingface | ||
| 101 | + git lfs track "*.onnx" | ||
| 102 | + git lfs track "bpe.model" | ||
| 103 | + git lfs track "*.wav" | ||
| 104 | + git status | ||
| 105 | + git add . | ||
| 106 | + | ||
| 107 | + git commit -m "add models" | ||
| 108 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main | ||
| 109 | + | ||
| 110 | + popd | ||
| 111 | + done | ||
| 112 | + | ||
| 113 | + - name: vosk-model-ru (zipformer) | ||
| 114 | + if: false | ||
| 115 | + shell: bash | ||
| 116 | + run: | | ||
| 117 | + rm -rf models | ||
| 118 | + mkdir models | ||
| 119 | + cd models | ||
| 120 | + cat >README.md <<EOF | ||
| 121 | + # Introduction | ||
| 122 | + Models in this directory are from | ||
| 123 | + https://huggingface.co/alphacep/vosk-model-ru/tree/main | ||
| 124 | + EOF | ||
| 125 | + | ||
| 126 | + git lfs install | ||
| 127 | + git clone https://huggingface.co/alphacep/vosk-model-ru hf | ||
| 128 | + | ||
| 129 | + ls -lh | ||
| 130 | + | ||
| 131 | + mkdir test_wavs | ||
| 132 | + pushd test_wavs | ||
| 133 | + curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav | ||
| 134 | + curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav | ||
| 135 | + | ||
| 136 | + mv russian-i-love-you.wav 0.wav | ||
| 137 | + mv test.wav 1.wav | ||
| 138 | + popd | ||
| 139 | + | ||
| 140 | + d=sherpa-onnx-zipformer-ru-2025-04-20 | ||
| 141 | + mkdir $d | ||
| 142 | + cp -v hf/am-onnx/encoder.onnx $d | ||
| 143 | + cp -v hf/am-onnx/decoder.onnx $d | ||
| 144 | + cp -v hf/am-onnx/joiner.onnx $d | ||
| 145 | + cp -v hf/lang/bpe.model $d | ||
| 146 | + cp -v hf/lang/tokens.txt $d | ||
| 147 | + cp -av test_wavs $d/ | ||
| 148 | + cp -v README.md $d | ||
| 149 | + | ||
| 150 | + tar cjfv $d.tar.bz2 $d | ||
| 151 | + | ||
| 152 | + d=sherpa-onnx-zipformer-ru-int8-2025-04-20 | ||
| 153 | + mkdir $d | ||
| 154 | + cp -v hf/am-onnx/encoder.int8.onnx $d | ||
| 155 | + cp -v hf/am-onnx/decoder.onnx $d | ||
| 156 | + cp -v hf/am-onnx/joiner.int8.onnx $d | ||
| 157 | + cp -v hf/lang/bpe.model $d | ||
| 158 | + cp -v hf/lang/tokens.txt $d | ||
| 159 | + cp -av test_wavs $d | ||
| 160 | + cp -v README.md $d | ||
| 161 | + | ||
| 162 | + tar cjfv $d.tar.bz2 $d | ||
| 163 | + | ||
| 164 | + rm -rf hf | ||
| 165 | + | ||
| 166 | + ls -lh | ||
| 167 | + | ||
| 168 | + cd .. | ||
| 169 | + | ||
| 170 | + mv models/* . | ||
| 171 | + | ||
| 172 | + - name: Publish to huggingface (Russian zipformer) | ||
| 173 | + if: false | ||
| 174 | + env: | ||
| 175 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 176 | + uses: nick-fields/retry@v3 | ||
| 177 | + with: | ||
| 178 | + max_attempts: 20 | ||
| 179 | + timeout_seconds: 200 | ||
| 180 | + shell: bash | ||
| 181 | + command: | | ||
| 182 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 183 | + git config --global user.name "Fangjun Kuang" | ||
| 184 | + models=( | ||
| 185 | + sherpa-onnx-zipformer-ru-2025-04-20 | ||
| 186 | + sherpa-onnx-zipformer-ru-int8-2025-04-20 | ||
| 187 | + ) | ||
| 188 | + for d in ${models[@]}; do | ||
| 189 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 190 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 191 | + rm -rf huggingface | ||
| 192 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface | ||
| 193 | + cp -av $d/* huggingface | ||
| 194 | + | ||
| 195 | + pushd huggingface | ||
| 196 | + git lfs track "*.onnx" | ||
| 197 | + git lfs track "bpe.model" | ||
| 198 | + git lfs track "*.wav" | ||
| 199 | + git status | ||
| 200 | + git add . | ||
| 201 | + | ||
| 202 | + git commit -m "add models" | ||
| 203 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main | ||
| 204 | + | ||
| 205 | + popd | ||
| 206 | + done | ||
| 207 | + | ||
| 208 | + - name: Release | ||
| 209 | + uses: svenstaro/upload-release-action@v2 | ||
| 210 | + with: | ||
| 211 | + file_glob: true | ||
| 212 | + file: ./*.tar.bz2 | ||
| 213 | + overwrite: true | ||
| 214 | + repo_name: k2-fsa/sherpa-onnx | ||
| 215 | + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 216 | + tag: asr-models |
| @@ -459,6 +459,79 @@ def get_models(): | @@ -459,6 +459,79 @@ def get_models(): | ||
| 459 | popd | 459 | popd |
| 460 | """, | 460 | """, |
| 461 | ), | 461 | ), |
| 462 | + Model( | ||
| 463 | + model_name="sherpa-onnx-zipformer-vi-int8-2025-04-20", | ||
| 464 | + idx=26, | ||
| 465 | + lang="vi", | ||
| 466 | + lang2="Vietnamese", | ||
| 467 | + short_name="zipformer", | ||
| 468 | + cmd=""" | ||
| 469 | + pushd $model_name | ||
| 470 | + | ||
| 471 | + rm -rfv test_wavs | ||
| 472 | + rm -fv bpe.model | ||
| 473 | + | ||
| 474 | + ls -lh | ||
| 475 | + | ||
| 476 | + popd | ||
| 477 | + """, | ||
| 478 | + ), | ||
| 479 | + Model( | ||
| 480 | + model_name="sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19", | ||
| 481 | + idx=27, | ||
| 482 | + lang="ru", | ||
| 483 | + lang2="Russian", | ||
| 484 | + short_name="nemo_ctc_giga_am_v2", | ||
| 485 | + cmd=""" | ||
| 486 | + pushd $model_name | ||
| 487 | + | ||
| 488 | + rm -rfv test_wavs | ||
| 489 | + | ||
| 490 | + rm -fv *.sh | ||
| 491 | + rm -fv *.py | ||
| 492 | + | ||
| 493 | + ls -lh | ||
| 494 | + | ||
| 495 | + popd | ||
| 496 | + """, | ||
| 497 | + ), | ||
| 498 | + Model( | ||
| 499 | + model_name="sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19", | ||
| 500 | + idx=28, | ||
| 501 | + lang="ru", | ||
| 502 | + lang2="Russian", | ||
| 503 | + short_name="nemo_transducer_giga_am", | ||
| 504 | + cmd=""" | ||
| 505 | + pushd $model_name | ||
| 506 | + | ||
| 507 | + rm -rfv test_wavs | ||
| 508 | + | ||
| 509 | + rm -fv *.sh | ||
| 510 | + rm -fv *.py | ||
| 511 | + | ||
| 512 | + ls -lh | ||
| 513 | + | ||
| 514 | + popd | ||
| 515 | + """, | ||
| 516 | + ), | ||
| 517 | + Model( | ||
| 518 | + model_name="sherpa-onnx-zipformer-ru-int8-2025-04-20", | ||
| 519 | + idx=29, | ||
| 520 | + lang="ru", | ||
| 521 | + lang2="Russian", | ||
| 522 | + short_name="v2_zipformer", | ||
| 523 | + cmd=""" | ||
| 524 | + pushd $model_name | ||
| 525 | + | ||
| 526 | + rm -rfv test_wavs | ||
| 527 | + | ||
| 528 | + rm -fv bpe.model | ||
| 529 | + | ||
| 530 | + ls -lh | ||
| 531 | + | ||
| 532 | + popd | ||
| 533 | + """, | ||
| 534 | + ), | ||
| 462 | ] | 535 | ] |
| 463 | return models | 536 | return models |
| 464 | 537 |
| @@ -496,6 +496,55 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -496,6 +496,55 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 496 | tokens = "$modelDir/tokens.txt", | 496 | tokens = "$modelDir/tokens.txt", |
| 497 | ) | 497 | ) |
| 498 | } | 498 | } |
| 499 | + | ||
| 500 | + 26 -> { | ||
| 501 | + val modelDir = "sherpa-onnx-zipformer-vi-int8-2025-04-20" | ||
| 502 | + return OfflineModelConfig( | ||
| 503 | + transducer = OfflineTransducerModelConfig( | ||
| 504 | + encoder = "$modelDir/encoder-epoch-12-avg-8.int8.onnx", | ||
| 505 | + decoder = "$modelDir/decoder-epoch-12-avg-8.onnx", | ||
| 506 | + joiner = "$modelDir/joiner-epoch-12-avg-8.int8.onnx", | ||
| 507 | + ), | ||
| 508 | + tokens = "$modelDir/tokens.txt", | ||
| 509 | + modelType = "transducer", | ||
| 510 | + ) | ||
| 511 | + } | ||
| 512 | + | ||
| 513 | + 27 -> { | ||
| 514 | + val modelDir = "sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19" | ||
| 515 | + return OfflineModelConfig( | ||
| 516 | + nemo = OfflineNemoEncDecCtcModelConfig( | ||
| 517 | + model = "$modelDir/model.int8.onnx", | ||
| 518 | + ), | ||
| 519 | + tokens = "$modelDir/tokens.txt", | ||
| 520 | + ) | ||
| 521 | + } | ||
| 522 | + | ||
| 523 | + 28 -> { | ||
| 524 | + val modelDir = "sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19" | ||
| 525 | + return OfflineModelConfig( | ||
| 526 | + transducer = OfflineTransducerModelConfig( | ||
| 527 | + encoder = "$modelDir/encoder.int8.onnx", | ||
| 528 | + decoder = "$modelDir/decoder.onnx", | ||
| 529 | + joiner = "$modelDir/joiner.onnx", | ||
| 530 | + ), | ||
| 531 | + tokens = "$modelDir/tokens.txt", | ||
| 532 | + modelType = "nemo_transducer", | ||
| 533 | + ) | ||
| 534 | + } | ||
| 535 | + | ||
| 536 | + 29 -> { | ||
| 537 | + val modelDir = "sherpa-onnx-zipformer-ru-int8-2025-04-20" | ||
| 538 | + return OfflineModelConfig( | ||
| 539 | + transducer = OfflineTransducerModelConfig( | ||
| 540 | + encoder = "$modelDir/encoder.int8.onnx", | ||
| 541 | + decoder = "$modelDir/decoder.onnx", | ||
| 542 | + joiner = "$modelDir/joiner.int8.onnx", | ||
| 543 | + ), | ||
| 544 | + tokens = "$modelDir/tokens.txt", | ||
| 545 | + modelType = "transducer", | ||
| 546 | + ) | ||
| 547 | + } | ||
| 499 | } | 548 | } |
| 500 | return null | 549 | return null |
| 501 | } | 550 | } |
-
请 注册 或 登录 后发表评论