Committed by
GitHub
Add VOSK streaming Russian ASR models and Kroko streaming German ASR models (#2502)
This PR adds support for new streaming ASR models from VOSK (Russian) and Kroko (German) to the sherpa-onnx project. It integrates three new models: one German Kroko streaming model and two Russian VOSK streaming models (regular and int8 quantized versions). - Adds configuration for three new streaming ASR models (indices 24-26) - Updates APK generation scripts to include the new models - Modifies GitHub workflow for model upload and deployment
正在显示
3 个修改的文件
包含
170 行增加
和
4 行删除
| @@ -4,7 +4,7 @@ on: | @@ -4,7 +4,7 @@ on: | ||
| 4 | push: | 4 | push: |
| 5 | branches: | 5 | branches: |
| 6 | - upload-models | 6 | - upload-models |
| 7 | - - more-models | 7 | + # - upload-more-models |
| 8 | workflow_dispatch: | 8 | workflow_dispatch: |
| 9 | 9 | ||
| 10 | concurrency: | 10 | concurrency: |
| @@ -32,15 +32,21 @@ jobs: | @@ -32,15 +32,21 @@ jobs: | ||
| 32 | git config --global user.name "Fangjun Kuang" | 32 | git config --global user.name "Fangjun Kuang" |
| 33 | 33 | ||
| 34 | - name: Streaming zipformer from Banafo/Kroko-ASR | 34 | - name: Streaming zipformer from Banafo/Kroko-ASR |
| 35 | - if: true | 35 | + if: false |
| 36 | shell: bash | 36 | shell: bash |
| 37 | env: | 37 | env: |
| 38 | HF_TOKEN: ${{ secrets.HF_TOKEN }} | 38 | HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 39 | run: | | 39 | run: | |
| 40 | git lfs install | 40 | git lfs install |
| 41 | git clone https://csukuangfj:$HF_TOKEN@huggingface.co/Banafo/Kroko-ASR src | 41 | git clone https://csukuangfj:$HF_TOKEN@huggingface.co/Banafo/Kroko-ASR src |
| 42 | + pushd src | ||
| 43 | + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_encoder.onnx | ||
| 44 | + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_decoder.onnx | ||
| 45 | + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_joiner.onnx | ||
| 46 | + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_tokens.txt | ||
| 47 | + popd | ||
| 42 | 48 | ||
| 43 | - for lang in en es fr; do | 49 | + for lang in en es fr de; do |
| 44 | repo=sherpa-onnx-streaming-zipformer-$lang-kroko-2025-08-06 | 50 | repo=sherpa-onnx-streaming-zipformer-$lang-kroko-2025-08-06 |
| 45 | git clone https://huggingface.co/csukuangfj/$repo | 51 | git clone https://huggingface.co/csukuangfj/$repo |
| 46 | cp src/${lang}_encoder.onnx $repo/encoder.onnx | 52 | cp src/${lang}_encoder.onnx $repo/encoder.onnx |
| @@ -67,7 +73,6 @@ jobs: | @@ -67,7 +73,6 @@ jobs: | ||
| 67 | 73 | ||
| 68 | popd | 74 | popd |
| 69 | 75 | ||
| 70 | - | ||
| 71 | rm -rf $repo/.git* | 76 | rm -rf $repo/.git* |
| 72 | 77 | ||
| 73 | tar cjfv $repo.tar.bz2 $repo | 78 | tar cjfv $repo.tar.bz2 $repo |
| @@ -239,6 +244,84 @@ jobs: | @@ -239,6 +244,84 @@ jobs: | ||
| 239 | popd | 244 | popd |
| 240 | done | 245 | done |
| 241 | 246 | ||
| 247 | + - name: vosk-model-ru (stream zipformer) | ||
| 248 | + if: false | ||
| 249 | + shell: bash | ||
| 250 | + env: | ||
| 251 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 252 | + run: | | ||
| 253 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 254 | + git config --global user.name "Fangjun Kuang" | ||
| 255 | + | ||
| 256 | + cat >README.md <<EOF | ||
| 257 | + # Introduction | ||
| 258 | + Models in this directory are from | ||
| 259 | + https://huggingface.co/alphacep/vosk-model-small-streaming-ru | ||
| 260 | + EOF | ||
| 261 | + | ||
| 262 | + git lfs install | ||
| 263 | + git clone https://huggingface.co/alphacep/vosk-model-small-streaming-ru hf | ||
| 264 | + | ||
| 265 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16 int8 | ||
| 266 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16 fp32 | ||
| 267 | + | ||
| 268 | + rm -fv int8/*.onnx | ||
| 269 | + rm -fv fp32/*.onnx | ||
| 270 | + | ||
| 271 | + mkdir -p int8/test_wavs | ||
| 272 | + mkdir -p fp32/test_wavs | ||
| 273 | + | ||
| 274 | + curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav | ||
| 275 | + curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav | ||
| 276 | + | ||
| 277 | + mv russian-i-love-you.wav 0.wav | ||
| 278 | + mv test.wav 1.wav | ||
| 279 | + | ||
| 280 | + cp -v README.md int8/ | ||
| 281 | + cp -v README.md fp32/ | ||
| 282 | + | ||
| 283 | + cp -v *.wav int8/test_wavs | ||
| 284 | + cp -v *.wav fp32/test_wavs | ||
| 285 | + | ||
| 286 | + cp -v hf/am-onnx/{encoder,decoder,joiner}.onnx fp32/ | ||
| 287 | + | ||
| 288 | + cp -v hf/am-onnx/{encoder,joiner}.int8.onnx int8/ | ||
| 289 | + cp -v hf/am-onnx/decoder.onnx int8/ | ||
| 290 | + | ||
| 291 | + cp -v hf/lang/tokens.txt int8/ | ||
| 292 | + cp -v hf/lang/bpe.model int8/ | ||
| 293 | + | ||
| 294 | + cp -v hf/lang/tokens.txt fp32/ | ||
| 295 | + cp -v hf/lang/bpe.model fp32/ | ||
| 296 | + | ||
| 297 | + mv int8 sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16 | ||
| 298 | + mv fp32 sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16 | ||
| 299 | + | ||
| 300 | + models=( | ||
| 301 | + sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16 | ||
| 302 | + sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16 | ||
| 303 | + ) | ||
| 304 | + | ||
| 305 | + for d in ${models[@]}; do | ||
| 306 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 307 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 308 | + pushd $d | ||
| 309 | + git lfs track "*.onnx" | ||
| 310 | + git lfs track "bpe.model" | ||
| 311 | + git lfs track "*.wav" | ||
| 312 | + git status | ||
| 313 | + git add . | ||
| 314 | + | ||
| 315 | + git commit -m "add models" | ||
| 316 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main | ||
| 317 | + popd | ||
| 318 | + | ||
| 319 | + rm -rf $d/.git* | ||
| 320 | + | ||
| 321 | + tar cjfv $d.tar.bz2 $d | ||
| 322 | + done | ||
| 323 | + ls -lh *.tar.bz2 | ||
| 324 | + | ||
| 242 | - name: vosk-model-ru (zipformer) | 325 | - name: vosk-model-ru (zipformer) |
| 243 | if: false | 326 | if: false |
| 244 | shell: bash | 327 | shell: bash |
| @@ -425,6 +425,50 @@ def get_models(): | @@ -425,6 +425,50 @@ def get_models(): | ||
| 425 | popd | 425 | popd |
| 426 | """, | 426 | """, |
| 427 | ), | 427 | ), |
| 428 | + Model( | ||
| 429 | + model_name="sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06", | ||
| 430 | + idx=24, | ||
| 431 | + lang="de", | ||
| 432 | + short_name="zipformer_kroko_asr", | ||
| 433 | + cmd=""" | ||
| 434 | + pushd $model_name | ||
| 435 | + rm -rf test_wavs | ||
| 436 | + | ||
| 437 | + ls -lh | ||
| 438 | + | ||
| 439 | + popd | ||
| 440 | + """, | ||
| 441 | + ), | ||
| 442 | + Model( | ||
| 443 | + model_name="sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16", | ||
| 444 | + idx=25, | ||
| 445 | + lang="ru", | ||
| 446 | + short_name="small_zipformer_int8", | ||
| 447 | + cmd=""" | ||
| 448 | + pushd $model_name | ||
| 449 | + rm -rf test_wavs | ||
| 450 | + rm -fv bpe.model | ||
| 451 | + | ||
| 452 | + ls -lh | ||
| 453 | + | ||
| 454 | + popd | ||
| 455 | + """, | ||
| 456 | + ), | ||
| 457 | + Model( | ||
| 458 | + model_name="sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16", | ||
| 459 | + idx=26, | ||
| 460 | + lang="ru", | ||
| 461 | + short_name="small_zipformer", | ||
| 462 | + cmd=""" | ||
| 463 | + pushd $model_name | ||
| 464 | + rm -rf test_wavs | ||
| 465 | + rm -fv bpe.model | ||
| 466 | + | ||
| 467 | + ls -lh | ||
| 468 | + | ||
| 469 | + popd | ||
| 470 | + """, | ||
| 471 | + ), | ||
| 428 | ] | 472 | ] |
| 429 | 473 | ||
| 430 | return models | 474 | return models |
| @@ -479,6 +479,45 @@ fun getModelConfig(type: Int): OnlineModelConfig? { | @@ -479,6 +479,45 @@ fun getModelConfig(type: Int): OnlineModelConfig? { | ||
| 479 | ) | 479 | ) |
| 480 | } | 480 | } |
| 481 | 481 | ||
| 482 | + 24 -> { | ||
| 483 | + val modelDir = "sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06" | ||
| 484 | + return OnlineModelConfig( | ||
| 485 | + transducer = OnlineTransducerModelConfig( | ||
| 486 | + encoder = "$modelDir/encoder.onnx", | ||
| 487 | + decoder = "$modelDir/decoder.onnx", | ||
| 488 | + joiner = "$modelDir/joiner.onnx", | ||
| 489 | + ), | ||
| 490 | + tokens = "$modelDir/tokens.txt", | ||
| 491 | + modelType = "zipformer2", | ||
| 492 | + ) | ||
| 493 | + } | ||
| 494 | + | ||
| 495 | + 25 -> { | ||
| 496 | + val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16" | ||
| 497 | + return OnlineModelConfig( | ||
| 498 | + transducer = OnlineTransducerModelConfig( | ||
| 499 | + encoder = "$modelDir/encoder.int8.onnx", | ||
| 500 | + decoder = "$modelDir/decoder.onnx", | ||
| 501 | + joiner = "$modelDir/joiner.int8.onnx", | ||
| 502 | + ), | ||
| 503 | + tokens = "$modelDir/tokens.txt", | ||
| 504 | + modelType = "zipformer2", | ||
| 505 | + ) | ||
| 506 | + } | ||
| 507 | + | ||
| 508 | + 26 -> { | ||
| 509 | + val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16" | ||
| 510 | + return OnlineModelConfig( | ||
| 511 | + transducer = OnlineTransducerModelConfig( | ||
| 512 | + encoder = "$modelDir/encoder.onnx", | ||
| 513 | + decoder = "$modelDir/decoder.onnx", | ||
| 514 | + joiner = "$modelDir/joiner.onnx", | ||
| 515 | + ), | ||
| 516 | + tokens = "$modelDir/tokens.txt", | ||
| 517 | + modelType = "zipformer2", | ||
| 518 | + ) | ||
| 519 | + } | ||
| 520 | + | ||
| 482 | 1000 -> { | 521 | 1000 -> { |
| 483 | val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" | 522 | val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" |
| 484 | return OnlineModelConfig( | 523 | return OnlineModelConfig( |
-
请 注册 或 登录 后发表评论