Fangjun Kuang
Committed by GitHub

Add VOSK streaming Russian ASR models and Kroko streaming German ASR models (#2502)

This PR adds support for new streaming ASR models from VOSK (Russian) and Kroko (German) to the sherpa-onnx project. It integrates three new models: one German Kroko streaming model and two Russian VOSK streaming models (regular and int8 quantized versions).

- Adds configuration for three new streaming ASR models (indices 24-26)
- Updates APK generation scripts to include the new models
- Modifies GitHub workflow for model upload and deployment
@@ -4,7 +4,7 @@ on: @@ -4,7 +4,7 @@ on:
4 push: 4 push:
5 branches: 5 branches:
6 - upload-models 6 - upload-models
7 - - more-models 7 + # - upload-more-models
8 workflow_dispatch: 8 workflow_dispatch:
9 9
10 concurrency: 10 concurrency:
@@ -32,15 +32,21 @@ jobs: @@ -32,15 +32,21 @@ jobs:
32 git config --global user.name "Fangjun Kuang" 32 git config --global user.name "Fangjun Kuang"
33 33
34 - name: Streaming zipformer from Banafo/Kroko-ASR 34 - name: Streaming zipformer from Banafo/Kroko-ASR
35 - if: true 35 + if: false
36 shell: bash 36 shell: bash
37 env: 37 env:
38 HF_TOKEN: ${{ secrets.HF_TOKEN }} 38 HF_TOKEN: ${{ secrets.HF_TOKEN }}
39 run: | 39 run: |
40 git lfs install 40 git lfs install
41 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/Banafo/Kroko-ASR src 41 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/Banafo/Kroko-ASR src
  42 + pushd src
  43 + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_encoder.onnx
  44 + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_decoder.onnx
  45 + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_joiner.onnx
  46 + curl -SL -O https://huggingface.co/spaces/Banafo/Kroko-Streaming-ASR-Python/resolve/main/de_tokens.txt
  47 + popd
42 48
43 - for lang in en es fr; do 49 + for lang in en es fr de; do
44 repo=sherpa-onnx-streaming-zipformer-$lang-kroko-2025-08-06 50 repo=sherpa-onnx-streaming-zipformer-$lang-kroko-2025-08-06
45 git clone https://huggingface.co/csukuangfj/$repo 51 git clone https://huggingface.co/csukuangfj/$repo
46 cp src/${lang}_encoder.onnx $repo/encoder.onnx 52 cp src/${lang}_encoder.onnx $repo/encoder.onnx
@@ -67,7 +73,6 @@ jobs: @@ -67,7 +73,6 @@ jobs:
67 73
68 popd 74 popd
69 75
70 -  
71 rm -rf $repo/.git* 76 rm -rf $repo/.git*
72 77
73 tar cjfv $repo.tar.bz2 $repo 78 tar cjfv $repo.tar.bz2 $repo
@@ -239,6 +244,84 @@ jobs: @@ -239,6 +244,84 @@ jobs:
239 popd 244 popd
240 done 245 done
241 246
  247 + - name: vosk-model-ru (stream zipformer)
  248 + if: false
  249 + shell: bash
  250 + env:
  251 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  252 + run: |
  253 + git config --global user.email "csukuangfj@gmail.com"
  254 + git config --global user.name "Fangjun Kuang"
  255 +
  256 + cat >README.md <<EOF
  257 + # Introduction
  258 + Models in this directory are from
  259 + https://huggingface.co/alphacep/vosk-model-small-streaming-ru
  260 + EOF
  261 +
  262 + git lfs install
  263 + git clone https://huggingface.co/alphacep/vosk-model-small-streaming-ru hf
  264 +
  265 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16 int8
  266 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16 fp32
  267 +
  268 + rm -fv int8/*.onnx
  269 + rm -fv fp32/*.onnx
  270 +
  271 + mkdir -p int8/test_wavs
  272 + mkdir -p fp32/test_wavs
  273 +
  274 + curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
  275 + curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav
  276 +
  277 + mv russian-i-love-you.wav 0.wav
  278 + mv test.wav 1.wav
  279 +
  280 + cp -v README.md int8/
  281 + cp -v README.md fp32/
  282 +
  283 + cp -v *.wav int8/test_wavs
  284 + cp -v *.wav fp32/test_wavs
  285 +
  286 + cp -v hf/am-onnx/{encoder,decoder,joiner}.onnx fp32/
  287 +
  288 + cp -v hf/am-onnx/{encoder,joiner}.int8.onnx int8/
  289 + cp -v hf/am-onnx/decoder.onnx int8/
  290 +
  291 + cp -v hf/lang/tokens.txt int8/
  292 + cp -v hf/lang/bpe.model int8/
  293 +
  294 + cp -v hf/lang/tokens.txt fp32/
  295 + cp -v hf/lang/bpe.model fp32/
  296 +
  297 + mv int8 sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16
  298 + mv fp32 sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16
  299 +
  300 + models=(
  301 + sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16
  302 + sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16
  303 + )
  304 +
  305 + for d in ${models[@]}; do
  306 + export GIT_LFS_SKIP_SMUDGE=1
  307 + export GIT_CLONE_PROTECTION_ACTIVE=false
  308 + pushd $d
  309 + git lfs track "*.onnx"
  310 + git lfs track "bpe.model"
  311 + git lfs track "*.wav"
  312 + git status
  313 + git add .
  314 +
  315 + git commit -m "add models"
  316 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
  317 + popd
  318 +
  319 + rm -rf $d/.git*
  320 +
  321 + tar cjfv $d.tar.bz2 $d
  322 + done
  323 + ls -lh *.tar.bz2
  324 +
242 - name: vosk-model-ru (zipformer) 325 - name: vosk-model-ru (zipformer)
243 if: false 326 if: false
244 shell: bash 327 shell: bash
@@ -425,6 +425,50 @@ def get_models(): @@ -425,6 +425,50 @@ def get_models():
425 popd 425 popd
426 """, 426 """,
427 ), 427 ),
  428 + Model(
  429 + model_name="sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06",
  430 + idx=24,
  431 + lang="de",
  432 + short_name="zipformer_kroko_asr",
  433 + cmd="""
  434 + pushd $model_name
  435 + rm -rf test_wavs
  436 +
  437 + ls -lh
  438 +
  439 + popd
  440 + """,
  441 + ),
  442 + Model(
  443 + model_name="sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16",
  444 + idx=25,
  445 + lang="ru",
  446 + short_name="small_zipformer_int8",
  447 + cmd="""
  448 + pushd $model_name
  449 + rm -rf test_wavs
  450 + rm -fv bpe.model
  451 +
  452 + ls -lh
  453 +
  454 + popd
  455 + """,
  456 + ),
  457 + Model(
  458 + model_name="sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16",
  459 + idx=26,
  460 + lang="ru",
  461 + short_name="small_zipformer",
  462 + cmd="""
  463 + pushd $model_name
  464 + rm -rf test_wavs
  465 + rm -fv bpe.model
  466 +
  467 + ls -lh
  468 +
  469 + popd
  470 + """,
  471 + ),
428 ] 472 ]
429 473
430 return models 474 return models
@@ -479,6 +479,45 @@ fun getModelConfig(type: Int): OnlineModelConfig? { @@ -479,6 +479,45 @@ fun getModelConfig(type: Int): OnlineModelConfig? {
479 ) 479 )
480 } 480 }
481 481
  482 + 24 -> {
  483 + val modelDir = "sherpa-onnx-streaming-zipformer-de-kroko-2025-08-06"
  484 + return OnlineModelConfig(
  485 + transducer = OnlineTransducerModelConfig(
  486 + encoder = "$modelDir/encoder.onnx",
  487 + decoder = "$modelDir/decoder.onnx",
  488 + joiner = "$modelDir/joiner.onnx",
  489 + ),
  490 + tokens = "$modelDir/tokens.txt",
  491 + modelType = "zipformer2",
  492 + )
  493 + }
  494 +
  495 + 25 -> {
  496 + val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-int8-2025-08-16"
  497 + return OnlineModelConfig(
  498 + transducer = OnlineTransducerModelConfig(
  499 + encoder = "$modelDir/encoder.int8.onnx",
  500 + decoder = "$modelDir/decoder.onnx",
  501 + joiner = "$modelDir/joiner.int8.onnx",
  502 + ),
  503 + tokens = "$modelDir/tokens.txt",
  504 + modelType = "zipformer2",
  505 + )
  506 + }
  507 +
  508 + 26 -> {
  509 + val modelDir = "sherpa-onnx-streaming-zipformer-small-ru-vosk-2025-08-16"
  510 + return OnlineModelConfig(
  511 + transducer = OnlineTransducerModelConfig(
  512 + encoder = "$modelDir/encoder.onnx",
  513 + decoder = "$modelDir/decoder.onnx",
  514 + joiner = "$modelDir/joiner.onnx",
  515 + ),
  516 + tokens = "$modelDir/tokens.txt",
  517 + modelType = "zipformer2",
  518 + )
  519 + }
  520 +
482 1000 -> { 521 1000 -> {
483 val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" 522 val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20"
484 return OnlineModelConfig( 523 return OnlineModelConfig(