Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across multiple language bindings, WebAssembly, examples, and CI workflows. - Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs - Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js - Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models Model doc is available at https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html

Support non-streaming zipformer CTC ASR models (#2340)
This PR adds support for non-streaming Zipformer CTC ASR models across multiple language bindings, WebAssembly, examples, and CI workflows. - Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs - Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js - Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models Model doc is available at https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
Fangjun Kuang · GitHub
Commit 3bf986d08d245e1ec482f0c203b85f3d6501c0ed 3bf986d0 1 parent ef16455c
.github/scripts/test-dart.sh
.github/scripts/test-dot-net.sh
.github/scripts/test-nodejs-addon-npm.sh
.github/scripts/test-nodejs-npm.sh
.github/scripts/test-swift.sh
.github/workflows/aarch64-linux-gnu-shared.yaml
.github/workflows/aarch64-linux-gnu-static.yaml
.github/workflows/pascal.yaml
.github/workflows/run-java-test.yaml
.github/workflows/test-go.yaml
.github/workflows/upload-models.yaml
README.md
cxx-api-examples/CMakeLists.txt
cxx-api-examples/zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
cxx-api-examples/zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
dart-api-examples/non-streaming-asr/bin/zipformer-ctc.dart
dart-api-examples/non-streaming-asr/run-zipformer-ctc.sh
dart-api-examples/vad-with-non-streaming-asr/bin/zipformer-ctc.dart
dart-api-examples/vad-with-non-streaming-asr/run-zipformer-ctc.sh
dotnet-examples/offline-decode-files/Program.cs
--- a/.github/scripts/test-dart.sh
查看文件 @3bf986d
+++ b/.github/scripts/test-dart.sh
查看文件 @3bf986d
@@ -6,6 +6,10 @@ cd dart-api-examples
 
 pushd non-streaming-asr
 
+ echo '----------Zipformer CTC----------'
+ ./run-zipformer-ctc.sh
+ rm -rf sherpa-onnx-*
+ 
 echo '----------SenseVoice----------'
 ./run-sense-voice-with-hr.sh
 ./run-sense-voice.sh
@@ -114,6 +118,10 @@ popd
 
 pushd vad-with-non-streaming-asr
 
+ echo '----------Zipformer CTC----------'
+ ./run-zipformer-ctc.sh
+ rm -rf sherpa-onnx-*
+ 
 echo '----------Dolphin CTC----------'
 ./run-dolphin-ctc.sh
 rm -rf sherpa-onnx-*
--- a/.github/scripts/test-dot-net.sh
查看文件 @3bf986d
+++ b/.github/scripts/test-dot-net.sh
查看文件 @3bf986d
@@ -6,43 +6,11 @@ cd ./version-test
 ./run.sh
 ls -lh
 
- cd ../speech-enhancement-gtcrn
- ./run.sh
- ls -lh
- 
- cd ../kokoro-tts
- ./run-kokoro.sh
- ls -lh
- 
- cd ../offline-tts
- ./run-matcha-zh.sh
- ls -lh *.wav
- ./run-matcha-en.sh
- ls -lh *.wav
- ./run-aishell3.sh
- ls -lh *.wav
- ./run-piper.sh
- ls -lh *.wav
- ./run-hf-fanchen.sh
- ls -lh *.wav
- ls -lh
- 
- pushd ../..
- 
- mkdir tts
- 
- cp -v dotnet-examples/kokoro-tts/*.wav ./tts
- cp -v dotnet-examples/offline-tts/*.wav ./tts
- popd
- 
- cd ../offline-speaker-diarization
- ./run.sh
- rm -rfv *.onnx
- rm -fv *.wav
- rm -rfv sherpa-onnx-pyannote-*
- 
 cd ../offline-decode-files
 
+ ./run-zipformer-ctc.sh
+ rm -rf sherpa-onnx-*
+ 
 ./run-dolphin-ctc.sh
 rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
 
@@ -82,6 +50,41 @@ rm -rf sherpa-onnx-*
 ./run-tdnn-yesno.sh
 rm -rf sherpa-onnx-*
 
+ cd ../speech-enhancement-gtcrn
+ ./run.sh
+ ls -lh
+ 
+ cd ../kokoro-tts
+ ./run-kokoro.sh
+ ls -lh
+ 
+ cd ../offline-tts
+ ./run-matcha-zh.sh
+ ls -lh *.wav
+ ./run-matcha-en.sh
+ ls -lh *.wav
+ ./run-aishell3.sh
+ ls -lh *.wav
+ ./run-piper.sh
+ ls -lh *.wav
+ ./run-hf-fanchen.sh
+ ls -lh *.wav
+ ls -lh
+ 
+ pushd ../..
+ 
+ mkdir tts
+ 
+ cp -v dotnet-examples/kokoro-tts/*.wav ./tts
+ cp -v dotnet-examples/offline-tts/*.wav ./tts
+ popd
+ 
+ cd ../offline-speaker-diarization
+ ./run.sh
+ rm -rfv *.onnx
+ rm -fv *.wav
+ rm -rfv sherpa-onnx-pyannote-*
+ 
 cd ../keyword-spotting-from-files
 ./run.sh
 
@@ -115,5 +118,3 @@ rm -rf sherpa-onnx-*
 cd ../spoken-language-identification
 ./run.sh
 rm -rf sherpa-onnx-*
- 
- 
--- a/.github/scripts/test-nodejs-addon-npm.sh
查看文件 @3bf986d
+++ b/.github/scripts/test-nodejs-addon-npm.sh
查看文件 @3bf986d
@@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()")
 platform=$(node -p "require('os').platform()")
 node_version=$(node -p "process.versions.node.split('.')[0]")
 
+ echo "----------non-streaming ASR Zipformer CTC----------"
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ node ./test_asr_non_streaming_zipformer_ctc.js
+ rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+ 
 echo "----------non-streaming ASR NeMo parakeet tdt----------"
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
 tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
--- a/.github/scripts/test-nodejs-npm.sh
查看文件 @3bf986d
+++ b/.github/scripts/test-nodejs-npm.sh
查看文件 @3bf986d
@@ -9,6 +9,15 @@ git status
 ls -lh
 ls -lh node_modules
 
+ # asr with offline zipformer ctc
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ node ./test-offline-zipformer-ctc.js
+ rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+ 
 # asr with offline dolphin ctc
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
 tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
--- a/.github/scripts/test-swift.sh
查看文件 @3bf986d
+++ b/.github/scripts/test-swift.sh
查看文件 @3bf986d
@@ -9,6 +9,9 @@ ls -lh
 
 ./run-test-version.sh
 
+ ./run-zipformer-ctc-asr.sh
+ rm -rf sherpa-onnx-zipformer-*
+ 
 ./run-decode-file-sense-voice-with-hr.sh
 rm -rf sherpa-onnx-sense-voice-*
 rm -rf dict lexicon.txt replace.fst test-hr.wav
--- a/.github/workflows/aarch64-linux-gnu-shared.yaml
查看文件 @3bf986d
+++ b/.github/workflows/aarch64-linux-gnu-shared.yaml
查看文件 @3bf986d
@@ -89,6 +89,7 @@ jobs:
           make -j4 install
 
           cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
+           cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
 
           rm -rf install/lib/pkgconfig
           rm -fv install/lib/cargs.h
@@ -135,6 +136,7 @@ jobs:
               make -j4 install
 
               cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
+               cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
 
               rm -rf install/lib/pkgconfig
               rm -fv install/lib/cargs.h
--- a/.github/workflows/aarch64-linux-gnu-static.yaml
查看文件 @3bf986d
+++ b/.github/workflows/aarch64-linux-gnu-static.yaml
查看文件 @3bf986d
@@ -90,6 +90,7 @@ jobs:
               make install
 
               cp bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
+               cp bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
 
               ls -lh install/lib
 
--- a/.github/workflows/pascal.yaml
查看文件 @3bf986d
+++ b/.github/workflows/pascal.yaml
查看文件 @3bf986d
@@ -37,7 +37,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
+         os: [ubuntu-latest, macos-latest, macos-13, windows-latest, ubuntu-22.04-arm]
 
     steps:
       - uses: actions/checkout@v4
@@ -56,7 +56,7 @@ jobs:
           key: ${{ matrix.os }}
 
       - name: Install Free pascal compiler (ubuntu)
-         if: matrix.os == 'ubuntu-latest'
+         if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04-arm'
         shell: bash
         run: |
           sudo apt-get update
@@ -156,6 +156,10 @@ jobs:
 
           pushd non-streaming-asr
 
+           ./run-zipformer-ctc.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
           ./run-dolphin-ctc.sh
           rm -rf sherpa-onnx-*
           echo "---"
@@ -264,9 +268,12 @@ jobs:
 
           cd ./pascal-api-examples
 
- 
           pushd vad-with-non-streaming-asr
 
+           time ./run-vad-with-zipformer-ctc.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
           time ./run-vad-with-dolphin-ctc.sh
           rm -rf sherpa-onnx-*
           echo "---"
--- a/.github/workflows/run-java-test.yaml
查看文件 @3bf986d
+++ b/.github/workflows/run-java-test.yaml
查看文件 @3bf986d
@@ -165,6 +165,9 @@ jobs:
         run: |
           cd ./java-api-examples
 
+           ./run-non-streaming-decode-file-zipformer-ctc.sh
+           rm -rf sherpa-onnx-zipformer-ctc-*
+ 
           ./run-non-streaming-decode-file-dolphin-ctc.sh
           rm -rf sherpa-onnx-dolphin-*
 
--- a/.github/workflows/test-go.yaml
查看文件 @3bf986d
+++ b/.github/workflows/test-go.yaml
查看文件 @3bf986d
@@ -184,6 +184,10 @@ jobs:
           go build
           ls -lh
 
+           echo "Test Zipformer CTC"
+           ./run-zipformer-ctc.sh
+           rm -rf sherpa-onnx-zipformer-*
+ 
           echo "Test SenseVoice ctc"
           ./run-sense-voice-small-with-hr.sh
           ./run-sense-voice-small.sh
--- a/.github/workflows/upload-models.yaml
查看文件 @3bf986d
+++ b/.github/workflows/upload-models.yaml
查看文件 @3bf986d
@@ -19,12 +19,36 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-         python-version: ["3.8"]
+         python-version: ["3.10"]
 
     steps:
       - uses: actions/checkout@v4
 
+       - name: Zipformer CTC (non-streaming)
+         shell: bash
+         run: |
+           git lfs install
+           names=(
+             sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+             sherpa-onnx-zipformer-ctc-zh-2025-07-03
+             sherpa-onnx-zipformer-ctc-zh-fp16-2025-07-03
+           )
+           for name in ${names[@]}; do
+             git clone https://huggingface.co/csukuangfj/$name 
+             pushd $name
+             git lfs pull
+             rm -rf .git
+             rm -rfv .gitattributes
+             ls -lh
+             popd
+ 
+             tar cjfv $name.tar.bz2 $name
+             rm -rf $name
+             ls -lh *.tar.bz2
+           done
+ 
       - name: Vietnamese (zipformer)
+         if: false
         shell: bash
         run: |
           rm -rf models
@@ -76,6 +100,7 @@ jobs:
           mv models/* .
 
       - name: Publish to huggingface (Vietnamese zipformer)
+         if: false
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         uses: nick-fields/retry@v3
--- a/README.md
查看文件 @3bf986d
+++ b/README.md
查看文件 @3bf986d
@@ -114,6 +114,7 @@ We also have spaces built using WebAssembly. They are listed below:
 |Real-time speech recognition (Chinese + English) with Paraformer                          |[Click me][wasm-hf-streaming-asr-zh-en-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-paraformer]|
 |Real-time speech recognition (Chinese + English + Cantonese) with [Paraformer-large][Paraformer-large]|[Click me][wasm-hf-streaming-asr-zh-en-yue-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-yue-paraformer]|
 |Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer]    |[地址][wasm-ms-streaming-asr-en-zipformer]|
+ |VAD + speech recognition (Chinese) with [Zipformer CTC](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese)|[Click me][wasm-hf-vad-asr-zh-zipformer-ctc-07-03]| [地址][wasm-ms-vad-asr-zh-zipformer-ctc-07-03]|
 |VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
 |VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
 |VAD + speech recognition (English) with [Moonshine tiny][Moonshine tiny]|[Click me][wasm-hf-vad-asr-en-moonshine-tiny-en]| [地址][wasm-ms-vad-asr-en-moonshine-tiny-en]|
@@ -141,6 +142,7 @@ We also have spaces built using WebAssembly. They are listed below:
 |----------------------------------------|------------------------------------|-----------------------------------|
 | Speaker diarization                    | [Address][apk-speaker-diarization] | [点此][apk-speaker-diarization-cn]|
 | Streaming speech recognition           | [Address][apk-streaming-asr]       | [点此][apk-streaming-asr-cn]      |
+ | Simulated-streaming speech recognition | [Address][apk-simula-streaming-asr]| [点此][apk-simula-streaming-asr-cn]|
 | Text-to-speech                         | [Address][apk-tts]                 | [点此][apk-tts-cn]                |
 | Voice activity detection (VAD)         | [Address][apk-vad]                 | [点此][apk-vad-cn]                |
 | VAD + non-streaming speech recognition | [Address][apk-vad-asr]             | [点此][apk-vad-asr-cn]            |
@@ -250,8 +252,10 @@ for more models. The following table lists only **SOME** of them.
 
 |Name | Supported Languages| Description|
 |-----|-----|----|
+ |[sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english)| English | It is converted from <https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2>|
 |[Whisper tiny.en](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2)|English| See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html)|
 |[Moonshine tiny][Moonshine tiny]|English|See [also](https://github.com/usefulsensors/moonshine)|
+ |[sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese)|Chinese| A Zipformer CTC model|
 |[sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17][sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17]|Chinese, Cantonese, English, Korean, Japanese| 支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html)|
 |[sherpa-onnx-paraformer-zh-2024-03-09][sherpa-onnx-paraformer-zh-2024-03-09]|Chinese, English| 也支持多种中文方言. See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2024-03-09-chinese-english)|
 |[sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01][sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01]|Japanese|See [also](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01-japanese)|
@@ -413,6 +417,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech.
 [wasm-hf-streaming-asr-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en
 [wasm-ms-streaming-asr-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en
 [SenseVoice]: https://github.com/FunAudioLLM/SenseVoice
+ [wasm-hf-vad-asr-zh-zipformer-ctc-07-03]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc
+ [wasm-ms-vad-asr-zh-zipformer-ctc-07-03]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc/summary
 [wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice
 [wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
 [wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
@@ -423,20 +429,20 @@ It uses sherpa-onnx for speech-to-text and text-to-speech.
 [wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
 [wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
 [wasm-ms-vad-asr-zh-zipformer-wenetspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
- [ReazonSpeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf
+ [reazonspeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf
 [wasm-hf-vad-asr-ja-zipformer-reazonspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
 [wasm-ms-vad-asr-ja-zipformer-reazonspeech]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
- [GigaSpeech2]: https://github.com/SpeechColab/GigaSpeech2
+ [gigaspeech2]: https://github.com/speechcolab/gigaspeech2
 [wasm-hf-vad-asr-th-zipformer-gigaspeech2]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer
 [wasm-ms-vad-asr-th-zipformer-gigaspeech2]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer
- [TeleSpeech-ASR]: https://github.com/Tele-AI/TeleSpeech-ASR
+ [telespeech-asr]: https://github.com/tele-ai/telespeech-asr
 [wasm-hf-vad-asr-zh-telespeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
 [wasm-ms-vad-asr-zh-telespeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
 [wasm-hf-vad-asr-zh-en-paraformer-large]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
 [wasm-ms-vad-asr-zh-en-paraformer-large]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
 [wasm-hf-vad-asr-zh-en-paraformer-small]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
 [wasm-ms-vad-asr-zh-en-paraformer-small]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
- [Dolphin]: https://github.com/DataoceanAI/Dolphin
+ [dolphin]: https://github.com/dataoceanai/dolphin
 [wasm-ms-vad-asr-multi-lang-dolphin-base]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-multi-lang-dophin-ctc
 [wasm-hf-vad-asr-multi-lang-dolphin-base]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-multi-lang-dophin-ctc
 
@@ -450,6 +456,8 @@ It uses sherpa-onnx for speech-to-text and text-to-speech.
 [apk-speaker-diarization-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk-cn.html
 [apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
 [apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
+ [apk-simula-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk-simulate-streaming-asr.html
+ [apk-simula-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-simulate-streaming-asr-cn.html
 [apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
 [apk-tts-cn]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html
 [apk-vad]: https://k2-fsa.github.io/sherpa/onnx/vad/apk.html
--- a/cxx-api-examples/CMakeLists.txt
查看文件 @3bf986d
+++ b/cxx-api-examples/CMakeLists.txt
查看文件 @3bf986d
@@ -45,6 +45,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
     sherpa-onnx-cxx-api
     portaudio_static
   )
+ 
+   add_executable(zipformer-ctc-simulate-streaming-microphone-cxx-api
+     ./zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
+     ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
+   )
+   target_link_libraries(zipformer-ctc-simulate-streaming-microphone-cxx-api
+     sherpa-onnx-cxx-api
+     portaudio_static
+   )
 endif()
 
 if(SHERPA_ONNX_HAS_ALSA)
@@ -57,10 +66,21 @@ if(SHERPA_ONNX_HAS_ALSA)
     portaudio_static
   )
 
+   add_executable(zipformer-ctc-simulate-streaming-alsa-cxx-api
+     ./zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
+     ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/alsa.cc
+   )
+   target_link_libraries(zipformer-ctc-simulate-streaming-alsa-cxx-api
+     sherpa-onnx-cxx-api
+     portaudio_static
+   )
+ 
   if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
     target_link_libraries(sense-voice-simulate-streaming-alsa-cxx-api -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
+     target_link_libraries(zipformer-ctc-simulate-streaming-alsa-cxx-api -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
   else()
     target_link_libraries(sense-voice-simulate-streaming-alsa-cxx-api asound)
+     target_link_libraries(zipformer-ctc-simulate-streaming-alsa-cxx-api asound)
   endif()
 endif()
 
--- a/cxx-api-examples/zipformer-ctc-simulate-streaming-alsa-cxx-api.cc 0 → 100644
查看文件 @3bf986d
+++ b/cxx-api-examples/zipformer-ctc-simulate-streaming-alsa-cxx-api.cc 0 → 100644
查看文件 @3bf986d
+ // cxx-api-examples/zipformer-ctc-simulate-streaming-alsa-cxx-api.cc
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ //
+ // This file demonstrates how to use zipformer CTC with sherpa-onnx's C++ API
+ // for streaming speech recognition from a microphone.
+ //
+ // clang-format off
+ //
+ // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ //
+ // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ // tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ // rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ //
+ // clang-format on
+ 
+ #include <signal.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include <chrono>              // NOLINT
+ #include <condition_variable>  // NOLINT
+ #include <iostream>
+ #include <mutex>  // NOLINT
+ #include <queue>
+ #include <thread>  // NOLINT
+ #include <vector>
+ 
+ #include "sherpa-display.h"  // NOLINT
+ #include "sherpa-onnx/c-api/cxx-api.h"
+ #include "sherpa-onnx/csrc/alsa.h"
+ 
+ std::queue<std::vector<float>> samples_queue;
+ std::condition_variable condition_variable;
+ std::mutex mutex;
+ bool stop = false;
+ 
+ static void Handler(int32_t /*sig*/) {
+   stop = true;
+   condition_variable.notify_one();
+   fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
+ }
+ 
+ static void RecordCallback(sherpa_onnx::Alsa *alsa) {
+   int32_t chunk = 0.1 * alsa->GetActualSampleRate();
+   while (!stop) {
+     std::vector<float> samples = alsa->Read(chunk);
+ 
+     std::lock_guard<std::mutex> lock(mutex);
+     samples_queue.emplace(std::move(samples));
+     condition_variable.notify_one();
+   }
+ }
+ 
+ static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
+   using namespace sherpa_onnx::cxx;  // NOLINT
+   VadModelConfig config;
+   config.silero_vad.model = "./silero_vad.onnx";
+   config.silero_vad.threshold = 0.5;
+   config.silero_vad.min_silence_duration = 0.1;
+   config.silero_vad.min_speech_duration = 0.25;
+   config.silero_vad.max_speech_duration = 8;
+   config.sample_rate = 16000;
+   config.debug = false;
+ 
+   VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
+   if (!vad.Get()) {
+     std::cerr << "Failed to create VAD. Please check your config\n";
+     exit(-1);
+   }
+ 
+   return vad;
+ }
+ 
+ static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
+   using namespace sherpa_onnx::cxx;  // NOLINT
+   OfflineRecognizerConfig config;
+ 
+   config.model_config.zipformer_ctc.model =
+       "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx";
+   config.model_config.tokens =
+       "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt";
+ 
+   config.model_config.num_threads = 2;
+   config.model_config.debug = false;
+ 
+   std::cout << "Loading model\n";
+   OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
+   if (!recognizer.Get()) {
+     std::cerr << "Please check your config\n";
+     exit(-1);
+   }
+   std::cout << "Loading model done\n";
+   return recognizer;
+ }
+ 
+ int32_t main(int32_t argc, const char *argv[]) {
+   const char *kUsageMessage = R"usage(
+ Usage:
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ ./zipformer-ctc-simulate-streaming-alsa-cxx-api device_name
+ 
+ The device name specifies which microphone to use in case there are several
+ on your system. You can use
+ 
+   arecord -l
+ 
+ to find all available microphones on your computer. For instance, if it outputs
+ 
+ **** List of CAPTURE Hardware Devices ****
+ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
+   Subdevices: 1/1
+   Subdevice #0: subdevice #0
+ 
+ and if you want to select card 3 and device 0 on that card, please use:
+ 
+   plughw:3,0
+ 
+ as the device_name.
+ )usage";
+ 
+   if (argc != 2) {
+     fprintf(stderr, "%s\n", kUsageMessage);
+     return -1;
+   }
+ 
+   signal(SIGINT, Handler);
+ 
+   using namespace sherpa_onnx::cxx;  // NOLINT
+ 
+   auto vad = CreateVad();
+   auto recognizer = CreateOfflineRecognizer();
+ 
+   int32_t expected_sample_rate = 16000;
+ 
+   std::string device_name = argv[1];
+   sherpa_onnx::Alsa alsa(device_name.c_str());
+   fprintf(stderr, "Use recording device: %s\n", device_name.c_str());
+ 
+   if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
+     fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
+             expected_sample_rate);
+     exit(-1);
+   }
+ 
+   int32_t window_size = 512;  // samples, please don't change
+ 
+   int32_t offset = 0;
+   std::vector<float> buffer;
+   bool speech_started = false;
+ 
+   auto started_time = std::chrono::steady_clock::now();
+ 
+   SherpaDisplay display;
+ 
+   std::thread record_thread(RecordCallback, &alsa);
+ 
+   std::cout << "Started! Please speak\n";
+ 
+   while (!stop) {
+     {
+       std::unique_lock<std::mutex> lock(mutex);
+       while (samples_queue.empty() && !stop) {
+         condition_variable.wait(lock);
+       }
+ 
+       const auto &s = samples_queue.front();
+       buffer.insert(buffer.end(), s.begin(), s.end());
+ 
+       samples_queue.pop();
+     }
+ 
+     for (; offset + window_size < buffer.size(); offset += window_size) {
+       vad.AcceptWaveform(buffer.data() + offset, window_size);
+       if (!speech_started && vad.IsDetected()) {
+         speech_started = true;
+         started_time = std::chrono::steady_clock::now();
+       }
+     }
+     if (!speech_started) {
+       if (buffer.size() > 10 * window_size) {
+         offset -= buffer.size() - 10 * window_size;
+         buffer = {buffer.end() - 10 * window_size, buffer.end()};
+       }
+     }
+ 
+     auto current_time = std::chrono::steady_clock::now();
+     const float elapsed_seconds =
+         std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
+                                                               started_time)
+             .count() /
+         1000.;
+ 
+     if (speech_started && elapsed_seconds > 0.2) {
+       OfflineStream stream = recognizer.CreateStream();
+       stream.AcceptWaveform(expected_sample_rate, buffer.data(), buffer.size());
+ 
+       recognizer.Decode(&stream);
+ 
+       OfflineRecognizerResult result = recognizer.GetResult(&stream);
+       display.UpdateText(result.text);
+       display.Display();
+ 
+       started_time = std::chrono::steady_clock::now();
+     }
+ 
+     while (!vad.IsEmpty()) {
+       auto segment = vad.Front();
+ 
+       vad.Pop();
+ 
+       OfflineStream stream = recognizer.CreateStream();
+       stream.AcceptWaveform(expected_sample_rate, segment.samples.data(),
+                             segment.samples.size());
+ 
+       recognizer.Decode(&stream);
+ 
+       OfflineRecognizerResult result = recognizer.GetResult(&stream);
+ 
+       display.UpdateText(result.text);
+       display.FinalizeCurrentSentence();
+       display.Display();
+ 
+       buffer.clear();
+       offset = 0;
+       speech_started = false;
+     }
+   }
+ 
+   record_thread.join();
+ 
+   return 0;
+ }
--- a/cxx-api-examples/zipformer-ctc-simulate-streaming-microphone-cxx-api.cc 0 → 100644
查看文件 @3bf986d
+++ b/cxx-api-examples/zipformer-ctc-simulate-streaming-microphone-cxx-api.cc 0 → 100644
查看文件 @3bf986d
+ // cxx-api-examples/zipformer-ctc-simulate-streaming-microphone-cxx-api.cc
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ //
+ // This file demonstrates how to use Zipformer CTC with sherpa-onnx's C++ API
+ // for streaming speech recognition from a microphone.
+ //
+ // clang-format off
+ //
+ // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ //
+ // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ // tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ // rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ //
+ // clang-format on
+ 
+ #include <signal.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+ #include <chrono>              // NOLINT
+ #include <condition_variable>  // NOLINT
+ #include <iostream>
+ #include <mutex>  // NOLINT
+ #include <queue>
+ #include <vector>
+ 
+ #include "portaudio.h"       // NOLINT
+ #include "sherpa-display.h"  // NOLINT
+ #include "sherpa-onnx/c-api/cxx-api.h"
+ #include "sherpa-onnx/csrc/microphone.h"
+ 
+ std::queue<std::vector<float>> samples_queue;
+ std::condition_variable condition_variable;
+ std::mutex mutex;
+ bool stop = false;
+ 
+ static void Handler(int32_t /*sig*/) {
+   stop = true;
+   condition_variable.notify_one();
+   fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
+ }
+ 
+ static int32_t RecordCallback(const void *input_buffer,
+                               void * /*output_buffer*/,
+                               unsigned long frames_per_buffer,  // NOLINT
+                               const PaStreamCallbackTimeInfo * /*time_info*/,
+                               PaStreamCallbackFlags /*status_flags*/,
+                               void * /*user_data*/) {
+   std::lock_guard<std::mutex> lock(mutex);
+   samples_queue.emplace(
+       reinterpret_cast<const float *>(input_buffer),
+       reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
+   condition_variable.notify_one();
+ 
+   return stop ? paComplete : paContinue;
+ }
+ 
+ static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
+   using namespace sherpa_onnx::cxx;  // NOLINT
+   VadModelConfig config;
+   config.silero_vad.model = "./silero_vad.onnx";
+   config.silero_vad.threshold = 0.5;
+   config.silero_vad.min_silence_duration = 0.1;
+   config.silero_vad.min_speech_duration = 0.25;
+   config.silero_vad.max_speech_duration = 8;
+   config.sample_rate = 16000;
+   config.debug = false;
+ 
+   VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
+   if (!vad.Get()) {
+     std::cerr << "Failed to create VAD. Please check your config\n";
+     exit(-1);
+   }
+ 
+   return vad;
+ }
+ 
+ static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
+   using namespace sherpa_onnx::cxx;  // NOLINT
+   OfflineRecognizerConfig config;
+ 
+   config.model_config.zipformer_ctc.model =
+       "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx";
+   config.model_config.tokens =
+       "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt";
+ 
+   config.model_config.num_threads = 2;
+   config.model_config.debug = false;
+ 
+   std::cout << "Loading model\n";
+   OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
+   if (!recognizer.Get()) {
+     std::cerr << "Please check your config\n";
+     exit(-1);
+   }
+   std::cout << "Loading model done\n";
+   return recognizer;
+ }
+ 
+ int32_t main() {
+   signal(SIGINT, Handler);
+ 
+   using namespace sherpa_onnx::cxx;  // NOLINT
+ 
+   auto vad = CreateVad();
+   auto recognizer = CreateOfflineRecognizer();
+ 
+   sherpa_onnx::Microphone mic;
+ 
+   PaDeviceIndex num_devices = Pa_GetDeviceCount();
+   if (num_devices == 0) {
+     std::cerr << "  If you are using Linux, please try "
+                  "./build/bin/zipformer-ctc-simulate-streaming-alsa-cxx-api\n";
+     return -1;
+   }
+ 
+   int32_t device_index = Pa_GetDefaultInputDevice();
+   const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
+   if (pDeviceIndex) {
+     fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
+     device_index = atoi(pDeviceIndex);
+   }
+   mic.PrintDevices(device_index);
+ 
+   float mic_sample_rate = 16000;
+   const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
+   if (sample_rate_str) {
+     fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
+     mic_sample_rate = atof(sample_rate_str);
+   }
+   float sample_rate = 16000;
+   LinearResampler resampler;
+   if (mic_sample_rate != sample_rate) {
+     float min_freq = std::min(mic_sample_rate, sample_rate);
+     float lowpass_cutoff = 0.99 * 0.5 * min_freq;
+ 
+     int32_t lowpass_filter_width = 6;
+     resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
+                                         lowpass_cutoff, lowpass_filter_width);
+   }
+   if (mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
+                      nullptr) == false) {
+     std::cerr << "Failed to open microphone device\n";
+     return -1;
+   }
+ 
+   int32_t window_size = 512;  // samples, please don't change
+ 
+   int32_t offset = 0;
+   std::vector<float> buffer;
+   bool speech_started = false;
+ 
+   auto started_time = std::chrono::steady_clock::now();
+ 
+   SherpaDisplay display;
+ 
+   std::cout << "Started! Please speak\n";
+ 
+   while (!stop) {
+     {
+       std::unique_lock<std::mutex> lock(mutex);
+       while (samples_queue.empty() && !stop) {
+         condition_variable.wait(lock);
+       }
+ 
+       const auto &s = samples_queue.front();
+       if (!resampler.Get()) {
+         buffer.insert(buffer.end(), s.begin(), s.end());
+       } else {
+         auto resampled = resampler.Resample(s.data(), s.size(), false);
+         buffer.insert(buffer.end(), resampled.begin(), resampled.end());
+       }
+ 
+       samples_queue.pop();
+     }
+ 
+     for (; offset + window_size < buffer.size(); offset += window_size) {
+       vad.AcceptWaveform(buffer.data() + offset, window_size);
+       if (!speech_started && vad.IsDetected()) {
+         speech_started = true;
+         started_time = std::chrono::steady_clock::now();
+       }
+     }
+     if (!speech_started) {
+       if (buffer.size() > 10 * window_size) {
+         offset -= buffer.size() - 10 * window_size;
+         buffer = {buffer.end() - 10 * window_size, buffer.end()};
+       }
+     }
+ 
+     auto current_time = std::chrono::steady_clock::now();
+     const float elapsed_seconds =
+         std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
+                                                               started_time)
+             .count() /
+         1000.;
+ 
+     if (speech_started && elapsed_seconds > 0.2) {
+       OfflineStream stream = recognizer.CreateStream();
+       stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
+ 
+       recognizer.Decode(&stream);
+ 
+       OfflineRecognizerResult result = recognizer.GetResult(&stream);
+       display.UpdateText(result.text);
+       display.Display();
+ 
+       started_time = std::chrono::steady_clock::now();
+     }
+ 
+     while (!vad.IsEmpty()) {
+       auto segment = vad.Front();
+ 
+       vad.Pop();
+ 
+       OfflineStream stream = recognizer.CreateStream();
+       stream.AcceptWaveform(sample_rate, segment.samples.data(),
+                             segment.samples.size());
+ 
+       recognizer.Decode(&stream);
+ 
+       OfflineRecognizerResult result = recognizer.GetResult(&stream);
+ 
+       display.UpdateText(result.text);
+       display.FinalizeCurrentSentence();
+       display.Display();
+ 
+       buffer.clear();
+       offset = 0;
+       speech_started = false;
+     }
+   }
+ 
+   return 0;
+ }
--- a/dart-api-examples/non-streaming-asr/bin/zipformer-ctc.dart 0 → 100644
查看文件 @3bf986d
+++ b/dart-api-examples/non-streaming-asr/bin/zipformer-ctc.dart 0 → 100644
查看文件 @3bf986d
+ // Copyright (c)  2025  Xiaomi Corporation
+ import 'dart:io';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('model', help: 'Path to the Zipformer CTC model')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+ 
+   final res = parser.parse(arguments);
+   if (res['model'] == null ||
+       res['tokens'] == null ||
+       res['input-wav'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+ 
+   final model = res['model'] as String;
+   final tokens = res['tokens'] as String;
+   final inputWav = res['input-wav'] as String;
+ 
+   final zipformerCtc = sherpa_onnx.OfflineZipformerCtcModelConfig(model: model);
+ 
+   final modelConfig = sherpa_onnx.OfflineModelConfig(
+     zipformerCtc: zipformerCtc,
+     tokens: tokens,
+     debug: true,
+     numThreads: 1,
+   );
+   final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+   final recognizer = sherpa_onnx.OfflineRecognizer(config);
+ 
+   final waveData = sherpa_onnx.readWave(inputWav);
+   final stream = recognizer.createStream();
+ 
+   stream.acceptWaveform(
+       samples: waveData.samples, sampleRate: waveData.sampleRate);
+   recognizer.decode(stream);
+ 
+   final result = recognizer.getResult(stream);
+   print(result.text);
+ 
+   stream.free();
+   recognizer.free();
+ }
--- a/dart-api-examples/non-streaming-asr/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/dart-api-examples/non-streaming-asr/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ dart run \
+   ./bin/zipformer-ctc.dart \
+   --model ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
+   --tokens ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
+   --input-wav ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav
--- a/dart-api-examples/vad-with-non-streaming-asr/bin/zipformer-ctc.dart 0 → 100644
查看文件 @3bf986d
+++ b/dart-api-examples/vad-with-non-streaming-asr/bin/zipformer-ctc.dart 0 → 100644
查看文件 @3bf986d
+ // Copyright (c)  2025  Xiaomi Corporation
+ import 'dart:io';
+ import 'dart:typed_data';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
+     ..addOption('model', help: 'Path to the Zipformer CTC model')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+ 
+   final res = parser.parse(arguments);
+   if (res['silero-vad'] == null ||
+       res['model'] == null ||
+       res['tokens'] == null ||
+       res['input-wav'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+ 
+   // create VAD
+   final sileroVad = res['silero-vad'] as String;
+ 
+   final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
+     model: sileroVad,
+     minSilenceDuration: 0.25,
+     minSpeechDuration: 0.5,
+     maxSpeechDuration: 5.0,
+   );
+ 
+   final vadConfig = sherpa_onnx.VadModelConfig(
+     sileroVad: sileroVadConfig,
+     numThreads: 1,
+     debug: true,
+   );
+ 
+   final vad = sherpa_onnx.VoiceActivityDetector(
+       config: vadConfig, bufferSizeInSeconds: 10);
+ 
+   // create offline recognizer
+   final model = res['model'] as String;
+   final tokens = res['tokens'] as String;
+   final inputWav = res['input-wav'] as String;
+ 
+   final zipformerCtc = sherpa_onnx.OfflineZipformerCtcModelConfig(model: model);
+ 
+   final modelConfig = sherpa_onnx.OfflineModelConfig(
+     zipformerCtc: zipformerCtc,
+     tokens: tokens,
+     debug: true,
+     numThreads: 1,
+   );
+   final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
+   final recognizer = sherpa_onnx.OfflineRecognizer(config);
+ 
+   final waveData = sherpa_onnx.readWave(inputWav);
+   if (waveData.sampleRate != 16000) {
+     print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
+     exit(1);
+   }
+ 
+   int numSamples = waveData.samples.length;
+   int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
+ 
+   for (int i = 0; i != numIter; ++i) {
+     int start = i * vadConfig.sileroVad.windowSize;
+     vad.acceptWaveform(Float32List.sublistView(
+         waveData.samples, start, start + vadConfig.sileroVad.windowSize));
+ 
+     while (!vad.isEmpty()) {
+       final samples = vad.front().samples;
+       final startTime = vad.front().start.toDouble() / waveData.sampleRate;
+       final endTime =
+           startTime + samples.length.toDouble() / waveData.sampleRate;
+ 
+       final stream = recognizer.createStream();
+       stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
+       recognizer.decode(stream);
+ 
+       final result = recognizer.getResult(stream);
+       stream.free();
+       print(
+           '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
+ 
+       vad.pop();
+     }
+   }
+ 
+   vad.flush();
+ 
+   while (!vad.isEmpty()) {
+     final samples = vad.front().samples;
+     final startTime = vad.front().start.toDouble() / waveData.sampleRate;
+     final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
+ 
+     final stream = recognizer.createStream();
+     stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
+     recognizer.decode(stream);
+ 
+     final result = recognizer.getResult(stream);
+     stream.free();
+     print(
+         '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
+ 
+     vad.pop();
+   }
+ 
+   vad.free();
+ 
+   recognizer.free();
+ }
--- a/dart-api-examples/vad-with-non-streaming-asr/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/dart-api-examples/vad-with-non-streaming-asr/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ if [ ! -f ./lei-jun-test.wav ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
+ fi
+ 
+ if [[ ! -f ./silero_vad.onnx ]]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ fi
+ 
+ dart run \
+   ./bin/zipformer-ctc.dart \
+   --silero-vad ./silero_vad.onnx \
+   --model ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
+   --tokens ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
+   --input-wav ./lei-jun-test.wav
--- a/dotnet-examples/offline-decode-files/Program.cs
查看文件 @3bf986d
+++ b/dotnet-examples/offline-decode-files/Program.cs
查看文件 @3bf986d
@@ -75,6 +75,9 @@ class OfflineDecodeFiles
     [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
     public string NeMoCtc { get; set; } = string.Empty;
 
+     [Option("zipformer-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Zipformer CTC models")]
+     public string ZipformerCtc { get; set; } = string.Empty;
+ 
     [Option("dolphin-model", Required = false, Default = "", HelpText = "Path to dolphin ctc model")]
     public string DolphinModel { get; set; } = string.Empty;
 
@@ -240,6 +243,10 @@ to download pre-trained Tdnn models.
     {
       config.ModelConfig.Dolphin.Model = options.DolphinModel;
     }
+     else if (!string.IsNullOrEmpty(options.ZipformerCtc))
+     {
+       config.ModelConfig.ZipformerCtc.Model = options.ZipformerCtc;
+     }
     else if (!string.IsNullOrEmpty(options.TeleSpeechCtc))
     {
       config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
--- a/dotnet-examples/offline-decode-files/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/dotnet-examples/offline-decode-files/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ dotnet run \
+   --tokens=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
+   --zipformer-ctc=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
+   --num-threads=1 \
+   --files ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav \
+   ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/1.wav \
+   ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/8k.wav
--- a/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
查看文件 @3bf986d
+++ b/flutter/sherpa_onnx/lib/src/offline_recognizer.dart
查看文件 @3bf986d
@@ -104,6 +104,27 @@ class OfflineDolphinModelConfig {
   final String model;
 }
 
+ class OfflineZipformerCtcModelConfig {
+   const OfflineZipformerCtcModelConfig({this.model = ''});
+ 
+   factory OfflineZipformerCtcModelConfig.fromJson(Map<String, dynamic> json) {
+     return OfflineZipformerCtcModelConfig(
+       model: json['model'] as String? ?? '',
+     );
+   }
+ 
+   @override
+   String toString() {
+     return 'OfflineZipformerCtcModelConfig(model: $model)';
+   }
+ 
+   Map<String, dynamic> toJson() => {
+         'model': model,
+       };
+ 
+   final String model;
+ }
+ 
 class OfflineWhisperModelConfig {
   const OfflineWhisperModelConfig(
       {this.encoder = '',
@@ -288,6 +309,7 @@ class OfflineModelConfig {
     this.moonshine = const OfflineMoonshineModelConfig(),
     this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
     this.dolphin = const OfflineDolphinModelConfig(),
+     this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
     required this.tokens,
     this.numThreads = 1,
     this.debug = true,
@@ -336,6 +358,10 @@ class OfflineModelConfig {
           ? OfflineDolphinModelConfig.fromJson(
               json['dolphin'] as Map<String, dynamic>)
           : const OfflineDolphinModelConfig(),
+       zipformerCtc: json['zipformerCtc'] != null
+           ? OfflineZipformerCtcModelConfig.fromJson(
+               json['zipformerCtc'] as Map<String, dynamic>)
+           : const OfflineZipformerCtcModelConfig(),
       tokens: json['tokens'] as String,
       numThreads: json['numThreads'] as int? ?? 1,
       debug: json['debug'] as bool? ?? true,
@@ -349,7 +375,7 @@ class OfflineModelConfig {
 
   @override
   String toString() {
-     return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
+     return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
   }
 
   Map<String, dynamic> toJson() => {
@@ -362,6 +388,7 @@ class OfflineModelConfig {
         'moonshine': moonshine.toJson(),
         'fireRedAsr': fireRedAsr.toJson(),
         'dolphin': dolphin.toJson(),
+         'zipformerCtc': zipformerCtc.toJson(),
         'tokens': tokens,
         'numThreads': numThreads,
         'debug': debug,
@@ -381,6 +408,7 @@ class OfflineModelConfig {
   final OfflineMoonshineModelConfig moonshine;
   final OfflineFireRedAsrModelConfig fireRedAsr;
   final OfflineDolphinModelConfig dolphin;
+   final OfflineZipformerCtcModelConfig zipformerCtc;
 
   final String tokens;
   final int numThreads;
@@ -578,6 +606,8 @@ class OfflineRecognizer {
         config.model.fireRedAsr.decoder.toNativeUtf8();
 
     c.ref.model.dolphin.model = config.model.dolphin.model.toNativeUtf8();
+     c.ref.model.zipformerCtc.model =
+         config.model.zipformerCtc.model.toNativeUtf8();
 
     c.ref.model.tokens = config.model.tokens.toNativeUtf8();
 
@@ -623,6 +653,7 @@ class OfflineRecognizer {
     calloc.free(c.ref.model.modelType);
     calloc.free(c.ref.model.provider);
     calloc.free(c.ref.model.tokens);
+     calloc.free(c.ref.model.zipformerCtc.model);
     calloc.free(c.ref.model.dolphin.model);
     calloc.free(c.ref.model.fireRedAsr.decoder);
     calloc.free(c.ref.model.fireRedAsr.encoder);
--- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @3bf986d
+++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @3bf986d
@@ -266,6 +266,10 @@ final class SherpaOnnxOfflineDolphinModelConfig extends Struct {
   external Pointer<Utf8> model;
 }
 
+ final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct {
+   external Pointer<Utf8> model;
+ }
+ 
 final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
   external Pointer<Utf8> encoder;
   external Pointer<Utf8> decoder;
@@ -333,6 +337,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
   external SherpaOnnxOfflineMoonshineModelConfig moonshine;
   external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
   external SherpaOnnxOfflineDolphinModelConfig dolphin;
+   external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
 }
 
 final class SherpaOnnxOfflineRecognizerConfig extends Struct {
--- a/go-api-examples/non-streaming-decode-files/main.go
查看文件 @3bf986d
+++ b/go-api-examples/non-streaming-decode-files/main.go
查看文件 @3bf986d
@@ -28,6 +28,8 @@ func main() {
 
 	flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
 
+ 	flag.StringVar(&config.ModelConfig.ZipformerCtc.Model, "zipformer-ctc", "", "Path to the Zipformer CTC model")
+ 
 	flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model")
 
 	flag.StringVar(&config.ModelConfig.FireRedAsr.Encoder, "fire-red-asr-encoder", "", "Path to the FireRedAsr encoder model")
--- a/go-api-examples/non-streaming-decode-files/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/go-api-examples/non-streaming-decode-files/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ go mod tidy
+ go build
+ 
+ ./non-streaming-decode-files \
+   --zipformer-ctc ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
+   --tokens ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
+   --debug 0 \
+   ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav
--- a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
查看文件 @3bf986d
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets
查看文件 @3bf986d
@@ -15,6 +15,7 @@ export { Samples,
   OfflineTdnnModelConfig,
   OfflineSenseVoiceModelConfig,
   OfflineMoonshineModelConfig,
+   OfflineZipformerCtcModelConfig,
   OfflineModelConfig,
   OfflineLMConfig,
   OfflineRecognizerConfig,
--- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc
查看文件 @3bf986d
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc
查看文件 @3bf986d
@@ -45,7 +45,23 @@ static SherpaOnnxOfflineParaformerModelConfig GetOfflineParaformerModelConfig(
   return c;
 }
 
- static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinfig(
+ static SherpaOnnxOfflineZipformerCtcModelConfig
+ GetOfflineZipformerCtcModelConfig(Napi::Object obj) {
+   SherpaOnnxOfflineZipformerCtcModelConfig c;
+   memset(&c, 0, sizeof(c));
+ 
+   if (!obj.Has("zipformerCtc") || !obj.Get("zipformerCtc").IsObject()) {
+     return c;
+   }
+ 
+   Napi::Object o = obj.Get("zipformerCtc").As<Napi::Object>();
+ 
+   SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
+ 
+   return c;
+ }
+ 
+ static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig(
     Napi::Object obj) {
   SherpaOnnxOfflineDolphinModelConfig c;
   memset(&c, 0, sizeof(c));
@@ -185,7 +201,8 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
   c.sense_voice = GetOfflineSenseVoiceModelConfig(o);
   c.moonshine = GetOfflineMoonshineModelConfig(o);
   c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o);
-   c.dolphin = GetOfflineDolphinfig(o);
+   c.dolphin = GetOfflineDolphinModelConfig(o);
+   c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
 
   SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
   SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -312,6 +329,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
   SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder);
 
   SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model);
+   SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer_ctc.model);
 
   SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
   SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
--- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets
查看文件 @3bf986d
+++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets
查看文件 @3bf986d
@@ -55,6 +55,10 @@ export class OfflineDolphinModelConfig {
   public model: string = '';
 }
 
+ export class OfflineZipformerCtcModelConfig {
+   public model: string = '';
+ }
+ 
 export class OfflineWhisperModelConfig {
   public encoder: string = '';
   public decoder: string = '';
@@ -97,6 +101,7 @@ export class OfflineModelConfig {
   public senseVoice: OfflineSenseVoiceModelConfig = new OfflineSenseVoiceModelConfig();
   public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig();
   public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
+   public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
 }
 
 export class OfflineLMConfig {
--- a/java-api-examples/NonStreamingDecodeFileZipformerCtc.java 0 → 100644
查看文件 @3bf986d
+++ b/java-api-examples/NonStreamingDecodeFileZipformerCtc.java 0 → 100644
查看文件 @3bf986d
+ // Copyright 2025 Xiaomi Corporation
+ 
+ // This file shows how to use an offline Zipformer CTC model,
+ // i.e., non-streaming Zipformer CTC model,
+ // to decode files.
+ import com.k2fsa.sherpa.onnx.*;
+ 
+ public class NonStreamingDecodeFileZipformerCtc {
+   public static void main(String[] args) {
+     // please refer to
+     // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+     // to download model files
+     String model = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx";
+     String tokens = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt";
+ 
+     String waveFilename = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav";
+ 
+     WaveReader reader = new WaveReader(waveFilename);
+ 
+     OfflineZipformerCtcModelConfig zipformerCtc =
+         OfflineZipformerCtcModelConfig.builder().setModel(model).build();
+ 
+     OfflineModelConfig modelConfig =
+         OfflineModelConfig.builder()
+             .setZipformerCtc(zipformerCtc)
+             .setTokens(tokens)
+             .setNumThreads(1)
+             .setDebug(true)
+             .build();
+ 
+     OfflineRecognizerConfig config =
+         OfflineRecognizerConfig.builder()
+             .setOfflineModelConfig(modelConfig)
+             .setDecodingMethod("greedy_search")
+             .build();
+ 
+     OfflineRecognizer recognizer = new OfflineRecognizer(config);
+     OfflineStream stream = recognizer.createStream();
+     stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
+ 
+     recognizer.decode(stream);
+ 
+     String text = recognizer.getResult(stream).getText();
+ 
+     System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
+ 
+     stream.release();
+     recognizer.release();
+   }
+ }
--- a/java-api-examples/run-non-streaming-decode-file-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/java-api-examples/run-non-streaming-decode-file-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+   mkdir -p ../build
+   pushd ../build
+   cmake \
+     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+     -DBUILD_SHARED_LIBS=ON \
+     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+     -DSHERPA_ONNX_ENABLE_JNI=ON \
+     ..
+ 
+   make -j4
+   ls -lh lib
+   popd
+ fi
+ 
+ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+   pushd ../sherpa-onnx/java-api
+   make
+   popd
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ java \
+   -Djava.library.path=$PWD/../build/lib \
+   -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+   NonStreamingDecodeFileZipformerCtc.java
--- a/kotlin-api-examples/run.sh
查看文件 @3bf986d
+++ b/kotlin-api-examples/run.sh
查看文件 @3bf986d
@@ -253,6 +253,13 @@ function testOfflineAsr() {
     rm sherpa-onnx-zipformer-multi-zh-hans-2023-9-2.tar.bz2
   fi
 
+   if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx ]; then
+     curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+     tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+     rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   fi
+ 
   out_filename=test_offline_asr.jar
   kotlinc-jvm -include-runtime -d $out_filename \
     test_offline_asr.kt \
--- a/kotlin-api-examples/test_offline_asr.kt
查看文件 @3bf986d
+++ b/kotlin-api-examples/test_offline_asr.kt
查看文件 @3bf986d
 package com.k2fsa.sherpa.onnx
 
 fun main() {
-   val types = arrayOf(0, 2, 5, 6, 15, 21, 24, 25)
+   val types = arrayOf(0, 2, 5, 6, 15, 21, 24, 25, 31)
   for (type in types) {
     test(type)
   }
@@ -19,6 +19,7 @@ fun test(type: Int) {
     21 -> "./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav"
     24 -> "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"
     25 -> "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
+     31 -> "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav"
     else -> null
   }
 
--- a/nodejs-addon-examples/README.md
查看文件 @3bf986d
+++ b/nodejs-addon-examples/README.md
查看文件 @3bf986d
@@ -123,6 +123,7 @@ The following tables list the examples in this folder.
 |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
 |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
 |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
+ |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
 |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
 |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search|
 |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
@@ -137,6 +138,7 @@ The following tables list the examples in this folder.
 |[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)|
 |[./test_vad_asr_non_streaming_moonshine_microphone.js](./test_vad_asr_non_streaming_moonshine_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Moonshine](https://github.com/usefulsensors/moonshine)|
 |[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
+ |[./test_vad_asr_non_streaming_zipformer_ctc_microphone.js](./test_vad_asr_non_streaming_zipformer_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer CTC model with greedy search|
 |[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)|
 |[./test_vad_asr_non_streaming_sense_voice_microphone.js](./test_vad_asr_non_streaming_sense_voice_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)|
 
@@ -372,6 +374,21 @@ rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
 node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js
 ```
 
+ ### Non-streaming speech recognition with Zipformer CTC models
+ 
+ ```bash
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ node ./test_asr_non_streaming_zipformer_ctc.js
+ 
+ # To run VAD + non-streaming ASR with Paraformer using a microphone
+ npm install naudiodon2
+ node ./test_vad_asr_non_streaming_zipformer_ctc_microphone.js
+ ```
+ 
 ### Non-streaming speech recognition with NeMo CTC models
 
 ```bash
--- a/nodejs-addon-examples/test_asr_non_streaming_zipformer_ctc.js 0 → 100644
查看文件 @3bf986d
+++ b/nodejs-addon-examples/test_asr_non_streaming_zipformer_ctc.js 0 → 100644
查看文件 @3bf986d
+ // Copyright (c)  2025  Xiaomi Corporation
+ const sherpa_onnx = require('sherpa-onnx-node');
+ 
+ // Please download test files from
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ const config = {
+   'featConfig': {
+     'sampleRate': 16000,
+     'featureDim': 80,
+   },
+   'modelConfig': {
+     'zipformerCtc': {
+       'model': './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx',
+     },
+     'tokens': './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt',
+     'numThreads': 2,
+     'provider': 'cpu',
+     'debug': 1,
+   }
+ };
+ 
+ const waveFilename =
+     './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav';
+ 
+ const recognizer = new sherpa_onnx.OfflineRecognizer(config);
+ console.log('Started')
+ let start = Date.now();
+ const stream = recognizer.createStream();
+ const wave = sherpa_onnx.readWave(waveFilename);
+ stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
+ 
+ recognizer.decode(stream);
+ result = recognizer.getResult(stream)
+ let stop = Date.now();
+ console.log('Done')
+ 
+ const elapsed_seconds = (stop - start) / 1000;
+ const duration = wave.samples.length / wave.sampleRate;
+ const real_time_factor = elapsed_seconds / duration;
+ console.log('Wave duration', duration.toFixed(3), 'seconds')
+ console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
+ console.log(
+     `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+     real_time_factor.toFixed(3))
+ console.log(waveFilename)
+ console.log('result\n', result)
--- a/nodejs-addon-examples/test_vad_asr_non_streaming_zipformer_ctc_microphone.js 0 → 100644
查看文件 @3bf986d
+++ b/nodejs-addon-examples/test_vad_asr_non_streaming_zipformer_ctc_microphone.js 0 → 100644
查看文件 @3bf986d
+ // Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ //
+ const portAudio = require('naudiodon2');
+ // console.log(portAudio.getDevices());
+ 
+ const sherpa_onnx = require('sherpa-onnx-node');
+ 
+ function createRecognizer() {
+   // Please download test files from
+   // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+   const config = {
+     'featConfig': {
+       'sampleRate': 16000,
+       'featureDim': 80,
+     },
+     'modelConfig': {
+       'zipformerCtc': {
+         'model':
+             './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx',
+       },
+       'tokens': './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt',
+       'numThreads': 2,
+       'provider': 'cpu',
+       'debug': 1,
+     }
+   };
+ 
+   return new sherpa_onnx.OfflineRecognizer(config);
+ }
+ 
+ function createVad() {
+   // please download silero_vad.onnx from
+   // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+   const config = {
+     sileroVad: {
+       model: './silero_vad.onnx',
+       threshold: 0.5,
+       minSpeechDuration: 0.25,
+       minSilenceDuration: 0.5,
+       windowSize: 512,
+     },
+     sampleRate: 16000,
+     debug: true,
+     numThreads: 1,
+   };
+ 
+   const bufferSizeInSeconds = 60;
+ 
+   return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
+ }
+ 
+ const recognizer = createRecognizer();
+ const vad = createVad();
+ 
+ const bufferSizeInSeconds = 30;
+ const buffer =
+     new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
+ 
+ const ai = new portAudio.AudioIO({
+   inOptions: {
+     channelCount: 1,
+     closeOnError: true,  // Close the stream if an audio error is detected, if
+                          // set false then just log the error
+     deviceId: -1,  // Use -1 or omit the deviceId to select the default device
+     sampleFormat: portAudio.SampleFormatFloat32,
+     sampleRate: vad.config.sampleRate
+   }
+ });
+ 
+ let printed = false;
+ let index = 0;
+ ai.on('data', data => {
+   const windowSize = vad.config.sileroVad.windowSize;
+   buffer.push(new Float32Array(data.buffer));
+   while (buffer.size() > windowSize) {
+     const samples = buffer.get(buffer.head(), windowSize);
+     buffer.pop(windowSize);
+     vad.acceptWaveform(samples);
+   }
+ 
+   while (!vad.isEmpty()) {
+     const segment = vad.front();
+     vad.pop();
+     const stream = recognizer.createStream();
+     stream.acceptWaveform({
+       samples: segment.samples,
+       sampleRate: recognizer.config.featConfig.sampleRate
+     });
+     recognizer.decode(stream);
+     const r = recognizer.getResult(stream);
+     if (r.text.length > 0) {
+       const text = r.text.toLowerCase().trim();
+       console.log(`${index}: ${text}`);
+ 
+       const filename = `${index}-${text}-${
+           new Date()
+               .toLocaleTimeString('en-US', {hour12: false})
+               .split(' ')[0]}.wav`;
+       sherpa_onnx.writeWave(
+           filename,
+           {samples: segment.samples, sampleRate: vad.config.sampleRate});
+ 
+       index += 1;
+     }
+   }
+ });
+ 
+ ai.start();
+ console.log('Started! Please speak')
--- a/nodejs-examples/README.md
查看文件 @3bf986d
+++ b/nodejs-examples/README.md
查看文件 @3bf986d
@@ -154,6 +154,23 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
 node ./test-offline-dolphin-ctc.js
 ```
 
+ ## ./test-offline-zipformer-ctc.js
+ 
+ [./test-offline-zipformer-ctc.js](./test-offline-zipformer-ctc.js) demonstrates
+ how to decode a file with a Zipformer CTC model. In the code we use
+ [sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese).
+ 
+ You can use the following command to run it:
+ 
+ ```bash
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+ node ./test-offline-zipformer-ctc.js
+ ```
+ 
 ## ./test-offline-nemo-ctc.js
 
 [./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates
--- a/nodejs-examples/test-offline-zipformer-ctc.js 0 → 100644
查看文件 @3bf986d
+++ b/nodejs-examples/test-offline-zipformer-ctc.js 0 → 100644
查看文件 @3bf986d
+ // Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ //
+ const fs = require('fs');
+ const {Readable} = require('stream');
+ const wav = require('wav');
+ 
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOfflineRecognizer() {
+   let config = {
+     modelConfig: {
+       zipformerCtc: {
+         model: './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx',
+       },
+       tokens: './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt',
+     }
+   };
+ 
+   return sherpa_onnx.createOfflineRecognizer(config);
+ }
+ 
+ const recognizer = createOfflineRecognizer();
+ const stream = recognizer.createStream();
+ 
+ const waveFilename =
+     './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav';
+ const wave = sherpa_onnx.readWave(waveFilename);
+ stream.acceptWaveform(wave.sampleRate, wave.samples);
+ 
+ recognizer.decode(stream);
+ const text = recognizer.getResult(stream).text;
+ console.log(text);
+ 
+ stream.free();
+ recognizer.free();
--- a/pascal-api-examples/non-streaming-asr/.gitignore
查看文件 @3bf986d
+++ b/pascal-api-examples/non-streaming-asr/.gitignore
查看文件 @3bf986d
@@ -9,3 +9,4 @@ sense_voice
 telespeech_ctc
 moonshine
 dolphin_ctc
+ zipformer_ctc
--- a/pascal-api-examples/non-streaming-asr/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/pascal-api-examples/non-streaming-asr/run-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+ 
+ echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+ 
+ if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+   mkdir -p ../../build
+   pushd ../../build
+   cmake \
+     -DCMAKE_INSTALL_PREFIX=./install \
+     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+     -DBUILD_SHARED_LIBS=ON \
+     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+     ..
+ 
+   cmake --build . --target install --config Release
+   ls -lh lib
+   popd
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ fpc \
+   -dSHERPA_ONNX_USE_SHARED_LIBS \
+   -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+   -Fl$SHERPA_ONNX_DIR/build/install/lib \
+   ./zipformer_ctc.pas
+ 
+ export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+ 
+ ./zipformer_ctc
--- a/pascal-api-examples/non-streaming-asr/zipformer_ctc.pas 0 → 100644
查看文件 @3bf986d
+++ b/pascal-api-examples/non-streaming-asr/zipformer_ctc.pas 0 → 100644
查看文件 @3bf986d
+ { Copyright (c)  2025  Xiaomi Corporation }
+ 
+ {
+ This file shows how to use a non-streaming Zipformer CTC model
+ to decode files.
+ 
+ You can download the model files from
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ }
+ 
+ program zipformer_ctc;
+ 
+ {$mode objfpc}
+ 
+ uses
+   sherpa_onnx,
+   DateUtils,
+   SysUtils;
+ 
+ var
+   Wave: TSherpaOnnxWave;
+   WaveFilename: AnsiString;
+ 
+   Config: TSherpaOnnxOfflineRecognizerConfig;
+   Recognizer: TSherpaOnnxOfflineRecognizer;
+   Stream: TSherpaOnnxOfflineStream;
+   RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
+ 
+   Start: TDateTime;
+   Stop: TDateTime;
+ 
+   Elapsed: Single;
+   Duration: Single;
+   RealTimeFactor: Single;
+ begin
+   Initialize(Config);
+ 
+   Config.ModelConfig.ZipformerCtc.Model := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx';
+   Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt';
+   Config.ModelConfig.Provider := 'cpu';
+   Config.ModelConfig.NumThreads := 1;
+   Config.ModelConfig.Debug := False;
+ 
+   WaveFilename := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav';
+ 
+   Wave := SherpaOnnxReadWave(WaveFilename);
+ 
+   Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
+   Stream := Recognizer.CreateStream();
+   Start := Now;
+ 
+   Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
+   Recognizer.Decode(Stream);
+ 
+   RecognitionResult := Recognizer.GetResult(Stream);
+ 
+   Stop := Now;
+ 
+   Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
+   Duration := Length(Wave.Samples) / Wave.SampleRate;
+   RealTimeFactor := Elapsed / Duration;
+ 
+   WriteLn(RecognitionResult.ToString);
+   WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
+   WriteLn(Format('Elapsed %.3f s', [Elapsed]));
+   WriteLn(Format('Wave duration %.3f s', [Duration]));
+   WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
+ 
+   {Free resources to avoid memory leak.
+ 
+   Note: You don't need to invoke them for this simple script.
+   However, you have to invoke them in your own large/complex project.
+   }
+   FreeAndNil(Stream);
+   FreeAndNil(Recognizer);
+ end.
--- a/pascal-api-examples/vad-with-non-streaming-asr/.gitignore
查看文件 @3bf986d
+++ b/pascal-api-examples/vad-with-non-streaming-asr/.gitignore
查看文件 @3bf986d
@@ -2,3 +2,5 @@
 vad_with_whisper
 vad_with_sense_voice
 vad_with_moonshine
+ vad_with_zipformer_ctc
+ vad_with_dolphin
--- a/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+++ b/pascal-api-examples/vad-with-non-streaming-asr/run-vad-with-zipformer-ctc.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+ 
+ echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+ 
+ if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+   mkdir -p ../../build
+   pushd ../../build
+   cmake \
+     -DCMAKE_INSTALL_PREFIX=./install \
+     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+     -DBUILD_SHARED_LIBS=ON \
+     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+     ..
+ 
+   cmake --build . --target install --config Release
+   popd
+ fi
+ 
+ if [[ ! -f ./silero_vad.onnx ]]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+ fi
+ 
+ if [ ! -f ./lei-jun-test.wav ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ fi
+ 
+ fpc \
+   -dSHERPA_ONNX_USE_SHARED_LIBS \
+   -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+   -Fl$SHERPA_ONNX_DIR/build/install/lib \
+   ./vad_with_zipformer_ctc.pas
+ 
+ export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+ 
+ ./vad_with_zipformer_ctc
--- a/pascal-api-examples/vad-with-non-streaming-asr/vad_with_zipformer_ctc.pas 0 → 100644
查看文件 @3bf986d
+++ b/pascal-api-examples/vad-with-non-streaming-asr/vad_with_zipformer_ctc.pas 0 → 100644
查看文件 @3bf986d
+ { Copyright (c)  2025  Xiaomi Corporation }
+ 
+ {
+ This file shows how to use a non-streaming Zipformer CTC model
+ with silero VAD to decode files.
+ 
+ You can download the model files from
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ }
+ 
+ program vad_with_zipformer_ctc;
+ 
+ {$mode objfpc}
+ 
+ uses
+   sherpa_onnx,
+   SysUtils;
+ 
+ function CreateVad(): TSherpaOnnxVoiceActivityDetector;
+ var
+   Config: TSherpaOnnxVadModelConfig;
+ 
+   SampleRate: Integer;
+   WindowSize: Integer;
+ begin
+   Initialize(Config);
+ 
+   SampleRate := 16000; {Please don't change it unless you know the details}
+   WindowSize := 512; {Please don't change it unless you know the details}
+ 
+   Config.SileroVad.Model := './silero_vad.onnx';
+   Config.SileroVad.MinSpeechDuration := 0.5;
+   Config.SileroVad.MinSilenceDuration := 0.5;
+   Config.SileroVad.Threshold := 0.5;
+   Config.SileroVad.WindowSize := WindowSize;
+   Config.NumThreads:= 1;
+   Config.Debug:= True;
+   Config.Provider:= 'cpu';
+   Config.SampleRate := SampleRate;
+ 
+   Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
+ end;
+ 
+ function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
+ var
+   Config: TSherpaOnnxOfflineRecognizerConfig;
+ begin
+   Initialize(Config);
+ 
+   Config.ModelConfig.ZipformerCtc.Model := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx';
+   Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt';
+   Config.ModelConfig.Provider := 'cpu';
+   Config.ModelConfig.NumThreads := 1;
+   Config.ModelConfig.Debug := False;
+ 
+   Result := TSherpaOnnxOfflineRecognizer.Create(Config);
+ end;
+ 
+ var
+   Wave: TSherpaOnnxWave;
+ 
+   Recognizer: TSherpaOnnxOfflineRecognizer;
+   Vad: TSherpaOnnxVoiceActivityDetector;
+ 
+   Offset: Integer;
+   WindowSize: Integer;
+   SpeechSegment: TSherpaOnnxSpeechSegment;
+ 
+   Start: Single;
+   Duration: Single;
+ 
+   Stream: TSherpaOnnxOfflineStream;
+   RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
+ begin
+   Vad := CreateVad();
+   Recognizer := CreateOfflineRecognizer();
+ 
+   Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
+   if Wave.SampleRate <> Vad.Config.SampleRate then
+     begin
+       WriteLn(Format('Expected sample rate: %d. Given: %d',
+         [Vad.Config.SampleRate, Wave.SampleRate]));
+ 
+       Exit;
+     end;
+ 
+   WindowSize := Vad.Config.SileroVad.WindowSize;
+   Offset := 0;
+   while Offset + WindowSize <= Length(Wave.Samples) do
+     begin
+       Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
+       Offset += WindowSize;
+ 
+       while not Vad.IsEmpty do
+         begin
+           SpeechSegment := Vad.Front();
+           Vad.Pop();
+           Stream := Recognizer.CreateStream();
+ 
+           Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
+           Recognizer.Decode(Stream);
+           RecognitionResult := Recognizer.GetResult(Stream);
+ 
+           Start := SpeechSegment.Start / Wave.SampleRate;
+           Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
+           WriteLn(Format('%.3f -- %.3f %s',
+             [Start, Start + Duration, RecognitionResult.Text]));
+ 
+           FreeAndNil(Stream);
+         end;
+     end;
+ 
+   Vad.Flush;
+ 
+   while not Vad.IsEmpty do
+     begin
+       SpeechSegment := Vad.Front();
+       Vad.Pop();
+       Stream := Recognizer.CreateStream();
+ 
+       Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
+       Recognizer.Decode(Stream);
+       RecognitionResult := Recognizer.GetResult(Stream);
+ 
+       Start := SpeechSegment.Start / Wave.SampleRate;
+       Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
+       WriteLn(Format('%.3f -- %.3f %s',
+         [Start, Start + Duration, RecognitionResult.Text]));
+ 
+       FreeAndNil(Stream);
+     end;
+ 
+   FreeAndNil(Recognizer);
+   FreeAndNil(Vad);
+ end.
--- a/python-api-examples/offline-zipformer-ctc-decode-files.py 0 → 100755
查看文件 @3bf986d
+++ b/python-api-examples/offline-zipformer-ctc-decode-files.py 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env python3
+ 
+ """
+ This file shows how to use a non-streaming zipformer CTC model from icefall
+ to decode files.
+ 
+ Please download model files from
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ 
+ """
+ 
+ from pathlib import Path
+ 
+ import sherpa_onnx
+ import soundfile as sf
+ 
+ 
+ def create_recognizer():
+     model = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx"
+     tokens = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt"
+     test_wav = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav"
+ 
+     if not Path(model).is_file() or not Path(test_wav).is_file():
+         raise ValueError(
+             """Please download model files from
+             https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+             """
+         )
+     return (
+         sherpa_onnx.OfflineRecognizer.from_zipformer_ctc(
+             model=model,
+             tokens=tokens,
+             debug=True,
+         ),
+         test_wav,
+     )
+ 
+ 
+ def main():
+     recognizer, wave_filename = create_recognizer()
+ 
+     audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+     audio = audio[:, 0]  # only use the first channel
+ 
+     # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+     # sample_rate does not need to be 16000 Hz
+ 
+     stream = recognizer.create_stream()
+     stream.accept_waveform(sample_rate, audio)
+     recognizer.decode_stream(stream)
+     print(wave_filename)
+     print(stream.result)
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/scripts/apk/generate-asr-apk-script.py
查看文件 @3bf986d
+++ b/scripts/apk/generate-asr-apk-script.py
查看文件 @3bf986d
@@ -344,7 +344,7 @@ def get_models():
             """,
         ),
         Model(
-             model_name="sherpa-onnx-streaming-zipformer-ctc-fp16-zh-2025-06-30",
+             model_name="sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30",
             idx=19,
             lang="zh",
             short_name="large_zipformer_fp16",
@@ -363,6 +363,26 @@ def get_models():
             popd
             """,
         ),
+         Model(
+             model_name="sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30",
+             idx=20,
+             lang="zh",
+             short_name="large_zipformer_int8",
+             rule_fsts="itn_zh_number.fst",
+             cmd="""
+             if [ ! -f itn_zh_number.fst ]; then
+               curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+             fi
+             pushd $model_name
+             rm -fv bpe.model
+ 
+             rm -rf test_wavs
+ 
+             ls -lh
+ 
+             popd
+             """,
+         ),
     ]
 
     return models
--- a/scripts/apk/generate-vad-asr-apk-script.py
查看文件 @3bf986d
+++ b/scripts/apk/generate-vad-asr-apk-script.py
查看文件 @3bf986d
@@ -551,6 +551,23 @@ def get_models():
             popd
             """,
         ),
+         Model(
+             model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
+             idx=31,
+             lang="zh",
+             lang2="Chinese",
+             short_name="zipformer_2025_07_03",
+             cmd="""
+             pushd $model_name
+ 
+             rm -rfv test_wavs
+             rm -rfv bbpe.model
+ 
+             ls -lh
+ 
+             popd
+             """,
+         ),
     ]
     return models
 
--- a/scripts/dotnet/OfflineModelConfig.cs
查看文件 @3bf986d
+++ b/scripts/dotnet/OfflineModelConfig.cs
查看文件 @3bf986d
@@ -27,6 +27,7 @@ namespace SherpaOnnx
             Moonshine = new OfflineMoonshineModelConfig();
             FireRedAsr = new OfflineFireRedAsrModelConfig();
             Dolphin = new OfflineDolphinModelConfig();
+             ZipformerCtc = new OfflineZipformerCtcModelConfig();
         }
         public OfflineTransducerModelConfig Transducer;
         public OfflineParaformerModelConfig Paraformer;
@@ -60,5 +61,6 @@ namespace SherpaOnnx
         public OfflineMoonshineModelConfig Moonshine;
         public OfflineFireRedAsrModelConfig FireRedAsr;
         public OfflineDolphinModelConfig Dolphin;
+         public OfflineZipformerCtcModelConfig ZipformerCtc;
     }
 }
--- a/scripts/dotnet/OfflineZipformerCtcModelConfig.cs 0 → 100644
查看文件 @3bf986d
+++ b/scripts/dotnet/OfflineZipformerCtcModelConfig.cs 0 → 100644
查看文件 @3bf986d
+ /// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ using System.Runtime.InteropServices;
+ 
+ namespace SherpaOnnx
+ {
+ 
+     [StructLayout(LayoutKind.Sequential)]
+     public struct OfflineZipformerCtcModelConfig
+     {
+         public OfflineZipformerCtcModelConfig()
+         {
+             Model = "";
+         }
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string Model;
+     }
+ }
--- a/scripts/go/_internal/non-streaming-decode-files/run-zipformer-ctc.sh 0 → 120000
查看文件 @3bf986d
+++ b/scripts/go/_internal/non-streaming-decode-files/run-zipformer-ctc.sh 0 → 120000
查看文件 @3bf986d
+ ../../../../go-api-examples/non-streaming-decode-files/run-zipformer-ctc.sh
\ No newline at end of file
--- a/scripts/go/sherpa_onnx.go
查看文件 @3bf986d
+++ b/scripts/go/sherpa_onnx.go
查看文件 @3bf986d
@@ -398,6 +398,10 @@ type OfflineNemoEncDecCtcModelConfig struct {
 	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
 }
 
+ type OfflineZipformerCtcModelConfig struct {
+ 	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
+ }
+ 
 type OfflineDolphinModelConfig struct {
 	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
 }
@@ -448,6 +452,7 @@ type OfflineModelConfig struct {
 	Moonshine    OfflineMoonshineModelConfig
 	FireRedAsr   OfflineFireRedAsrModelConfig
 	Dolphin      OfflineDolphinModelConfig
+ 	ZipformerCtc OfflineZipformerCtcModelConfig
 	Tokens       string // Path to tokens.txt
 
 	// Number of threads to use for neural network computation
@@ -540,6 +545,7 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
 	c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder)
 
 	c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
+ 	c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
 
 	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
 
@@ -653,11 +659,22 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
 		C.free(unsafe.Pointer(c.model_config.fire_red_asr.encoder))
 		c.model_config.fire_red_asr.encoder = nil
 	}
+ 
 	if c.model_config.fire_red_asr.decoder != nil {
 		C.free(unsafe.Pointer(c.model_config.fire_red_asr.decoder))
 		c.model_config.fire_red_asr.decoder = nil
 	}
 
+ 	if c.model_config.dolphin.model != nil {
+ 		C.free(unsafe.Pointer(c.model_config.dolphin.model))
+ 		c.model_config.dolphin.model = nil
+ 	}
+ 
+ 	if c.model_config.zipformer_ctc.model != nil {
+ 		C.free(unsafe.Pointer(c.model_config.zipformer_ctc.model))
+ 		c.model_config.zipformer_ctc.model = nil
+ 	}
+ 
 	if c.model_config.tokens != nil {
 		C.free(unsafe.Pointer(c.model_config.tokens))
 		c.model_config.tokens = nil
--- a/scripts/wasm/generate-vad-asr.py
查看文件 @3bf986d
+++ b/scripts/wasm/generate-vad-asr.py
查看文件 @3bf986d
@@ -212,6 +212,21 @@ def get_models():
             git diff
             """,
         ),
+         Model(
+             model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
+             hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
+             ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
+             short_name="vad-asr-zh-zipformer-ctc",
+             cmd="""
+             pushd $model_name
+             mv model.int8.onnx ../zipformer-ctc.onnx
+             mv tokens.txt ../
+             popd
+             rm -rf $model_name
+             sed -i.bak 's/Zipformer/Zipformer CTC supporting Chinese 中文/g' ../index.html
+             git diff
+             """,
+         ),
     ]
     return models
 
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @3bf986d
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @3bf986d
@@ -484,6 +484,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
   recognizer_config.model_config.dolphin.model =
       SHERPA_ONNX_OR(config->model_config.dolphin.model, "");
 
+   recognizer_config.model_config.zipformer_ctc.model =
+       SHERPA_ONNX_OR(config->model_config.zipformer_ctc.model, "");
+ 
   recognizer_config.lm_config.model =
       SHERPA_ONNX_OR(config->lm_config.model, "");
   recognizer_config.lm_config.scale =
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @3bf986d
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @3bf986d
@@ -451,6 +451,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineDolphinModelConfig {
   const char *model;
 } SherpaOnnxOfflineDolphinModelConfig;
 
+ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineZipformerCtcModelConfig {
+   const char *model;
+ } SherpaOnnxOfflineZipformerCtcModelConfig;
+ 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
   SherpaOnnxOfflineTransducerModelConfig transducer;
   SherpaOnnxOfflineParaformerModelConfig paraformer;
@@ -474,6 +478,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
   SherpaOnnxOfflineMoonshineModelConfig moonshine;
   SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr;
   SherpaOnnxOfflineDolphinModelConfig dolphin;
+   SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc;
 } SherpaOnnxOfflineModelConfig;
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
--- a/sherpa-onnx/c-api/cxx-api.cc
查看文件 @3bf986d
+++ b/sherpa-onnx/c-api/cxx-api.cc
查看文件 @3bf986d
@@ -252,6 +252,9 @@ OfflineRecognizer OfflineRecognizer::Create(
 
   c.model_config.dolphin.model = config.model_config.dolphin.model.c_str();
 
+   c.model_config.zipformer_ctc.model =
+       config.model_config.zipformer_ctc.model.c_str();
+ 
   c.lm_config.model = config.lm_config.model.c_str();
   c.lm_config.scale = config.lm_config.scale;
 
--- a/sherpa-onnx/c-api/cxx-api.h
查看文件 @3bf986d
+++ b/sherpa-onnx/c-api/cxx-api.h
查看文件 @3bf986d
@@ -241,6 +241,10 @@ struct SHERPA_ONNX_API OfflineDolphinModelConfig {
   std::string model;
 };
 
+ struct SHERPA_ONNX_API OfflineZipformerCtcModelConfig {
+   std::string model;
+ };
+ 
 struct SHERPA_ONNX_API OfflineMoonshineModelConfig {
   std::string preprocessor;
   std::string encoder;
@@ -267,6 +271,7 @@ struct SHERPA_ONNX_API OfflineModelConfig {
   OfflineMoonshineModelConfig moonshine;
   OfflineFireRedAsrModelConfig fire_red_asr;
   OfflineDolphinModelConfig dolphin;
+   OfflineZipformerCtcModelConfig zipformer_ctc;
 };
 
 struct SHERPA_ONNX_API OfflineLMConfig {
--- a/sherpa-onnx/csrc/offline-ctc-model.cc
查看文件 @3bf986d
+++ b/sherpa-onnx/csrc/offline-ctc-model.cc
查看文件 @3bf986d
@@ -113,6 +113,16 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
     const OfflineModelConfig &config) {
   if (!config.dolphin.model.empty()) {
     return std::make_unique<OfflineDolphinModel>(config);
+   } else if (!config.nemo_ctc.model.empty()) {
+     return std::make_unique<OfflineNemoEncDecCtcModel>(config);
+   } else if (!config.tdnn.model.empty()) {
+     return std::make_unique<OfflineTdnnCtcModel>(config);
+   } else if (!config.zipformer_ctc.model.empty()) {
+     return std::make_unique<OfflineZipformerCtcModel>(config);
+   } else if (!config.wenet_ctc.model.empty()) {
+     return std::make_unique<OfflineWenetCtcModel>(config);
+   } else if (!config.telespeech_ctc.empty()) {
+     return std::make_unique<OfflineTeleSpeechCtcModel>(config);
   }
 
   // TODO(fangjun): Refactor it. We don't need to use model_type here
@@ -167,6 +177,16 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
     Manager *mgr, const OfflineModelConfig &config) {
   if (!config.dolphin.model.empty()) {
     return std::make_unique<OfflineDolphinModel>(mgr, config);
+   } else if (!config.nemo_ctc.model.empty()) {
+     return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
+   } else if (!config.tdnn.model.empty()) {
+     return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
+   } else if (!config.zipformer_ctc.model.empty()) {
+     return std::make_unique<OfflineZipformerCtcModel>(mgr, config);
+   } else if (!config.wenet_ctc.model.empty()) {
+     return std::make_unique<OfflineWenetCtcModel>(mgr, config);
+   } else if (!config.telespeech_ctc.empty()) {
+     return std::make_unique<OfflineTeleSpeechCtcModel>(mgr, config);
   }
 
   // TODO(fangjun): Refactor it. We don't need to use model_type here
--- a/sherpa-onnx/java-api/Makefile
查看文件 @3bf986d
+++ b/sherpa-onnx/java-api/Makefile
查看文件 @3bf986d
@@ -33,6 +33,7 @@ java_files += OfflineWhisperModelConfig.java
 java_files += OfflineFireRedAsrModelConfig.java
 java_files += OfflineMoonshineModelConfig.java
 java_files += OfflineNemoEncDecCtcModelConfig.java
+ java_files += OfflineZipformerCtcModelConfig.java
 java_files += OfflineSenseVoiceModelConfig.java
 java_files += OfflineDolphinModelConfig.java
 java_files += OfflineModelConfig.java
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java
查看文件 @3bf986d
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineModelConfig.java
查看文件 @3bf986d
@@ -11,6 +11,7 @@ public class OfflineModelConfig {
     private final OfflineNemoEncDecCtcModelConfig nemo;
     private final OfflineSenseVoiceModelConfig senseVoice;
     private final OfflineDolphinModelConfig dolphin;
+     private final OfflineZipformerCtcModelConfig zipformerCtc;
     private final String teleSpeech;
     private final String tokens;
     private final int numThreads;
@@ -28,6 +29,7 @@ public class OfflineModelConfig {
         this.fireRedAsr = builder.fireRedAsr;
         this.moonshine = builder.moonshine;
         this.nemo = builder.nemo;
+         this.zipformerCtc = builder.zipformerCtc;
         this.senseVoice = builder.senseVoice;
         this.dolphin = builder.dolphin;
         this.teleSpeech = builder.teleSpeech;
@@ -52,7 +54,7 @@ public class OfflineModelConfig {
         return transducer;
     }
 
-     public OfflineWhisperModelConfig getZipformer2Ctc() {
+     public OfflineWhisperModelConfig getWhisper() {
         return whisper;
     }
 
@@ -68,6 +70,14 @@ public class OfflineModelConfig {
         return dolphin;
     }
 
+     public OfflineNemoEncDecCtcModelConfig getNemo() {
+         return nemo;
+     }
+ 
+     public OfflineZipformerCtcModelConfig getZipformerCtc() {
+         return zipformerCtc;
+     }
+ 
     public String getTokens() {
         return tokens;
     }
@@ -109,6 +119,7 @@ public class OfflineModelConfig {
         private OfflineNemoEncDecCtcModelConfig nemo = OfflineNemoEncDecCtcModelConfig.builder().build();
         private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
         private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
+         private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build();
         private String teleSpeech = "";
         private String tokens = "";
         private int numThreads = 1;
@@ -142,6 +153,11 @@ public class OfflineModelConfig {
             return this;
         }
 
+         public Builder setZipformerCtc(OfflineZipformerCtcModelConfig zipformerCtc) {
+             this.zipformerCtc = zipformerCtc;
+             return this;
+         }
+ 
         public Builder setTeleSpeech(String teleSpeech) {
             this.teleSpeech = teleSpeech;
             return this;
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineZipformerCtcModelConfig.java 0 → 100644
查看文件 @3bf986d
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineZipformerCtcModelConfig.java 0 → 100644
查看文件 @3bf986d
+ // Copyright 2025 Xiaomi Corporation
+ 
+ package com.k2fsa.sherpa.onnx;
+ 
+ public class OfflineZipformerCtcModelConfig {
+     private final String model;
+ 
+     private OfflineZipformerCtcModelConfig(Builder builder) {
+         this.model = builder.model;
+     }
+ 
+     public static Builder builder() {
+         return new Builder();
+     }
+ 
+     public String getModel() {
+         return model;
+     }
+ 
+     public static class Builder {
+         private String model = "";
+ 
+         public OfflineZipformerCtcModelConfig build() {
+             return new OfflineZipformerCtcModelConfig(this);
+         }
+ 
+         public Builder setModel(String model) {
+             this.model = model;
+             return this;
+         }
+     }
+ }
--- a/sherpa-onnx/jni/offline-recognizer.cc
查看文件 @3bf986d
+++ b/sherpa-onnx/jni/offline-recognizer.cc
查看文件 @3bf986d
@@ -269,6 +269,21 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
   ans.model_config.nemo_ctc.model = p;
   env->ReleaseStringUTFChars(s, p);
 
+   // zipformer ctc
+   fid =
+       env->GetFieldID(model_config_cls, "zipformerCtc",
+                       "Lcom/k2fsa/sherpa/onnx/OfflineZipformerCtcModelConfig;");
+   jobject zipformer_ctc_config = env->GetObjectField(model_config, fid);
+   jclass zipformer_ctc_config_cls = env->GetObjectClass(zipformer_ctc_config);
+ 
+   fid =
+       env->GetFieldID(zipformer_ctc_config_cls, "model", "Ljava/lang/String;");
+ 
+   s = (jstring)env->GetObjectField(zipformer_ctc_config, fid);
+   p = env->GetStringUTFChars(s, nullptr);
+   ans.model_config.zipformer_ctc.model = p;
+   env->ReleaseStringUTFChars(s, p);
+ 
   // dolphin
   fid = env->GetFieldID(model_config_cls, "dolphin",
                         "Lcom/k2fsa/sherpa/onnx/OfflineDolphinModelConfig;");
--- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
查看文件 @3bf986d
+++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
查看文件 @3bf986d
@@ -29,6 +29,10 @@ data class OfflineDolphinModelConfig(
     var model: String = "",
 )
 
+ data class OfflineZipformerCtcModelConfig(
+     var model: String = "",
+ )
+ 
 data class OfflineWhisperModelConfig(
     var encoder: String = "",
     var decoder: String = "",
@@ -64,6 +68,7 @@ data class OfflineModelConfig(
     var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(),
     var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
     var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(),
+     var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(),
     var teleSpeech: String = "",
     var numThreads: Int = 1,
     var debug: Boolean = false,
@@ -559,6 +564,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
                 modelType = "nemo_transducer",
             )
         }
+ 
+         31 -> {
+             val modelDir = "sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03"
+             return OfflineModelConfig(
+                 zipformerCtc = OfflineZipformerCtcModelConfig(
+                     model = "$modelDir/model.int8.onnx",
+                 ),
+                 tokens = "$modelDir/tokens.txt",
+             )
+         }
     }
     return null
 }
--- a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt
查看文件 @3bf986d
+++ b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt
查看文件 @3bf986d
@@ -412,6 +412,7 @@ fun getModelConfig(type: Int): OnlineModelConfig? {
                     model = "$modelDir/model.onnx",
                 ),
                 tokens = "$modelDir/tokens.txt",
+                 modelType = "zipformer2",
             )
         }
 
@@ -422,6 +423,7 @@ fun getModelConfig(type: Int): OnlineModelConfig? {
                     model = "$modelDir/model.fp16.onnx",
                 ),
                 tokens = "$modelDir/tokens.txt",
+                 modelType = "zipformer2",
             )
         }
 
--- a/sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @3bf986d
+++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @3bf986d
@@ -284,6 +284,11 @@ type
     function ToString: AnsiString;
   end;
 
+   TSherpaOnnxOfflineZipformerCtcModelConfig = record
+     Model: AnsiString;
+     function ToString: AnsiString;
+   end;
+ 
   TSherpaOnnxOfflineWhisperModelConfig = record
     Encoder: AnsiString;
     Decoder: AnsiString;
@@ -346,6 +351,7 @@ type
     Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
     FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
     Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
+     ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
     class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
     function ToString: AnsiString;
   end;
@@ -726,6 +732,9 @@ type
   SherpaOnnxOfflineDolphinModelConfig = record
     Model: PAnsiChar;
   end;
+   SherpaOnnxOfflineZipformerCtcModelConfig = record
+     Model: PAnsiChar;
+   end;
   SherpaOnnxOfflineWhisperModelConfig = record
     Encoder: PAnsiChar;
     Decoder: PAnsiChar;
@@ -773,6 +782,7 @@ type
     Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
     FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
     Dolphin: SherpaOnnxOfflineDolphinModelConfig;
+     ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
   end;
 
   SherpaOnnxOfflineRecognizerConfig = record
@@ -1536,6 +1546,12 @@ begin
     [Self.Model]);
 end;
 
+ function TSherpaOnnxOfflineZipformerCtcModelConfig.ToString: AnsiString;
+ begin
+   Result := Format('TSherpaOnnxOfflineZipformerCtcModelConfig(Model := %s)',
+     [Self.Model]);
+ end;
+ 
 function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
 begin
   Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
@@ -1610,14 +1626,15 @@ begin
     'SenseVoice := %s, ' +
     'Moonshine := %s, ' +
     'FireRedAsr := %s, ' +
-     'Dolphin := %s' +
+     'Dolphin := %s, ' +
+     'ZipformerCtc := %s' +
     ')',
     [Self.Transducer.ToString, Self.Paraformer.ToString,
      Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
      Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
      Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
      Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
-      Self.FireRedAsr.ToString, Self.Dolphin.ToString
+      Self.FireRedAsr.ToString, Self.Dolphin.ToString, Self.ZipformerCtc.ToString
      ]);
 end;
 
@@ -1688,6 +1705,7 @@ begin
   C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
 
   C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
+   C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model);
 
   C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
   C.LMConfig.Scale := Config.LMConfig.Scale;
--- a/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
查看文件 @3bf986d
+++ b/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
查看文件 @3bf986d
@@ -528,6 +528,87 @@ class OfflineRecognizer(object):
         return self
 
     @classmethod
+     def from_zipformer_ctc(
+         cls,
+         model: str,
+         tokens: str,
+         num_threads: int = 1,
+         sample_rate: int = 16000,
+         feature_dim: int = 80,
+         decoding_method: str = "greedy_search",
+         debug: bool = False,
+         provider: str = "cpu",
+         rule_fsts: str = "",
+         rule_fars: str = "",
+         hr_dict_dir: str = "",
+         hr_rule_fsts: str = "",
+         hr_lexicon: str = "",
+     ):
+         """
+         Please refer to
+         `<https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/index.html>`_
+         to download pre-trained models for different languages, e.g., Chinese,
+         English, etc.
+ 
+         Args:
+           model:
+             Path to ``model.onnx``.
+           tokens:
+             Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
+             columns::
+ 
+                 symbol integer_id
+ 
+           num_threads:
+             Number of threads for neural network computation.
+           sample_rate:
+             Sample rate of the training data used to train the model.
+           feature_dim:
+             Dimension of the feature used to train the model.
+           decoding_method:
+             Valid values are greedy_search.
+           debug:
+             True to show debug messages.
+           provider:
+             onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
+           rule_fsts:
+             If not empty, it specifies fsts for inverse text normalization.
+             If there are multiple fsts, they are separated by a comma.
+           rule_fars:
+             If not empty, it specifies fst archives for inverse text normalization.
+             If there are multiple archives, they are separated by a comma.
+         """
+         self = cls.__new__(cls)
+         model_config = OfflineModelConfig(
+             zipformer_ctc=OfflineZipformerCtcModelConfig(model=model),
+             tokens=tokens,
+             num_threads=num_threads,
+             debug=debug,
+             provider=provider,
+         )
+ 
+         feat_config = FeatureExtractorConfig(
+             sampling_rate=sample_rate,
+             feature_dim=feature_dim,
+         )
+ 
+         recognizer_config = OfflineRecognizerConfig(
+             feat_config=feat_config,
+             model_config=model_config,
+             decoding_method=decoding_method,
+             rule_fsts=rule_fsts,
+             rule_fars=rule_fars,
+             hr=HomophoneReplacerConfig(
+                 dict_dir=hr_dict_dir,
+                 lexicon=hr_lexicon,
+                 rule_fsts=hr_rule_fsts,
+             ),
+         )
+         self.recognizer = _Recognizer(recognizer_config)
+         self.config = recognizer_config
+         return self
+ 
+     @classmethod
     def from_nemo_ctc(
         cls,
         model: str,
--- a/swift-api-examples/.gitignore
查看文件 @3bf986d
+++ b/swift-api-examples/.gitignore
查看文件 @3bf986d
@@ -16,3 +16,6 @@ tts-kokoro-en
 tts-kokoro-zh-en
 speech-enhancement-gtcrn
 decode-file-sense-voice-with-hr
+ test-version
+ zipformer-ctc-asr
+ dolphin-ctc-asr
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @3bf986d
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @3bf986d
@@ -346,6 +346,14 @@ func sherpaOnnxOfflineParaformerModelConfig(
   )
 }
 
+ func sherpaOnnxOfflineZipformerCtcModelConfig(
+   model: String = ""
+ ) -> SherpaOnnxOfflineZipformerCtcModelConfig {
+   return SherpaOnnxOfflineZipformerCtcModelConfig(
+     model: toCPointer(model)
+   )
+ }
+ 
 func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
   model: String = ""
 ) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig {
@@ -449,7 +457,9 @@ func sherpaOnnxOfflineModelConfig(
   senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(),
   moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(),
   fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(),
-   dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig()
+   dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(),
+   zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig =
+     sherpaOnnxOfflineZipformerCtcModelConfig()
 ) -> SherpaOnnxOfflineModelConfig {
   return SherpaOnnxOfflineModelConfig(
     transducer: transducer,
@@ -468,7 +478,8 @@ func sherpaOnnxOfflineModelConfig(
     sense_voice: senseVoice,
     moonshine: moonshine,
     fire_red_asr: fireRedAsr,
-     dolphin: dolphin
+     dolphin: dolphin,
+     zipformer_ctc: zipformerCtc
   )
 }
 
--- a/swift-api-examples/run-zipformer-ctc-asr.sh 0 → 100755
查看文件 @3bf986d
+++ b/swift-api-examples/run-zipformer-ctc-asr.sh 0 → 100755
查看文件 @3bf986d
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx ]; then
+   echo "Please download the pre-trained model for testing."
+   echo "You can refer to"
+   echo ""
+   echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html#sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03-chinese"
+   echo ""
+   echo "for help"
+ 
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+ 
+   tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+   ls -lh sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+ fi
+ 
+ if [ ! -e ./zipformer-ctc-asr ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./zipformer-ctc-asr.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o zipformer-ctc-asr
+ 
+   strip zipformer-ctc-asr
+ else
+   echo "./zipformer-ctc-asr exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./zipformer-ctc-asr
--- a/swift-api-examples/zipformer-ctc-asr.swift 0 → 100644
查看文件 @3bf986d
+++ b/swift-api-examples/zipformer-ctc-asr.swift 0 → 100644
查看文件 @3bf986d
+ import AVFoundation
+ 
+ extension AudioBuffer {
+   func array() -> [Float] {
+     return Array(UnsafeBufferPointer(self))
+   }
+ }
+ 
+ extension AVAudioPCMBuffer {
+   func array() -> [Float] {
+     return self.audioBufferList.pointee.mBuffers.array()
+   }
+ }
+ 
+ func run() {
+   let model = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx"
+   let tokens = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt"
+ 
+   let zipformerCtc = sherpaOnnxOfflineZipformerCtcModelConfig(
+     model: model
+   )
+ 
+   let modelConfig = sherpaOnnxOfflineModelConfig(
+     tokens: tokens,
+     debug: 0,
+     zipformerCtc: zipformerCtc
+   )
+ 
+   let featConfig = sherpaOnnxFeatureConfig(
+     sampleRate: 16000,
+     featureDim: 80
+   )
+   var config = sherpaOnnxOfflineRecognizerConfig(
+     featConfig: featConfig,
+     modelConfig: modelConfig
+   )
+ 
+   let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
+ 
+   let filePath = "./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav"
+   let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+   let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+ 
+   let audioFormat = audioFile.processingFormat
+   assert(audioFormat.channelCount == 1)
+   assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+ 
+   let audioFrameCount = UInt32(audioFile.length)
+   let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+ 
+   try! audioFile.read(into: audioFileBuffer!)
+   let array: [Float]! = audioFileBuffer?.array()
+   let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
+   print("\nresult is:\n\(result.text)")
+   if result.timestamps.count != 0 {
+     print("\ntimestamps is:\n\(result.timestamps)")
+   }
+ 
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }
--- a/wasm/asr/sherpa-onnx-asr.js
查看文件 @3bf986d
+++ b/wasm/asr/sherpa-onnx-asr.js
查看文件 @3bf986d
@@ -43,6 +43,10 @@ function freeConfig(config, Module) {
     freeConfig(config.dolphin, Module)
   }
 
+   if ('zipformerCtc' in config) {
+     freeConfig(config.zipformerCtc, Module)
+   }
+ 
   if ('moonshine' in config) {
     freeConfig(config.moonshine, Module)
   }
@@ -627,6 +631,23 @@ function initSherpaOnnxOfflineDolphinModelConfig(config, Module) {
   }
 }
 
+ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
+   const n = Module.lengthBytesUTF8(config.model || '') + 1;
+ 
+   const buffer = Module._malloc(n);
+ 
+   const len = 1 * 4;  // 1 pointer
+   const ptr = Module._malloc(len);
+ 
+   Module.stringToUTF8(config.model || '', buffer, n);
+ 
+   Module.setValue(ptr, buffer, 'i8*');
+ 
+   return {
+     buffer: buffer, ptr: ptr, len: len,
+   }
+ }
+ 
 function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
   const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
   const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
@@ -840,6 +861,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
     };
   }
 
+   if (!('zipformerCtc' in config)) {
+     config.zipformerCtc = {
+       model: '',
+     };
+   }
+ 
   if (!('whisper' in config)) {
     config.whisper = {
       encoder: '',
@@ -906,9 +933,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
   const dolphin =
       initSherpaOnnxOfflineDolphinModelConfig(config.dolphin, Module);
 
+   const zipformerCtc =
+       initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module);
+ 
   const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
       tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
-       dolphin.len;
+       dolphin.len + zipformerCtc.len;
 
   const ptr = Module._malloc(len);
 
@@ -1010,11 +1040,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
   Module._CopyHeap(dolphin.ptr, dolphin.len, ptr + offset);
   offset += dolphin.len;
 
+   Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset);
+   offset += zipformerCtc.len;
+ 
   return {
     buffer: buffer, ptr: ptr, len: len, transducer: transducer,
         paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
         senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
-         dolphin: dolphin
+         dolphin: dolphin, zipformerCtc: zipformerCtc
   }
 }
 
--- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
查看文件 @3bf986d
+++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
查看文件 @3bf986d
@@ -13,6 +13,7 @@ extern "C" {
 static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
 
+ static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
 static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
 static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
 static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
@@ -31,7 +32,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
                       sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
                       sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
                       sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
-                       sizeof(SherpaOnnxOfflineDolphinModelConfig),
+                       sizeof(SherpaOnnxOfflineDolphinModelConfig) +
+                       sizeof(SherpaOnnxOfflineZipformerCtcModelConfig),
 
               "");
 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
@@ -77,6 +79,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
   auto moonshine = &model_config->moonshine;
   auto fire_red_asr = &model_config->fire_red_asr;
   auto dolphin = &model_config->dolphin;
+   auto zipformer_ctc = &model_config->zipformer_ctc;
 
   fprintf(stdout, "----------offline transducer model config----------\n");
   fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -117,6 +120,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
   fprintf(stdout, "----------offline Dolphin model config----------\n");
   fprintf(stdout, "model: %s\n", dolphin->model);
 
+   fprintf(stdout, "----------offline zipformer ctc model config----------\n");
+   fprintf(stdout, "model: %s\n", zipformer_ctc->model);
+ 
   fprintf(stdout, "tokens: %s\n", model_config->tokens);
   fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
   fprintf(stdout, "provider: %s\n", model_config->provider);
--- a/wasm/vad-asr/app-vad-asr.js
查看文件 @3bf986d
+++ b/wasm/vad-asr/app-vad-asr.js
查看文件 @3bf986d
@@ -117,6 +117,10 @@ function initOfflineRecognizer() {
     };
   } else if (fileExists('dolphin.onnx')) {
     config.modelConfig.dolphin = {model: './dolphin.onnx'};
+   } else if (fileExists('zipformer-ctc.onnx')) {
+     // you need to rename model.int8.onnx from zipformer CTC to
+     // zipformer-ctc.onnx
+     config.modelConfig.zipformerCtc = {model: './zipformer-ctc.onnx'};
   } else {
     console.log('Please specify a model.');
     alert('Please specify a model.');