Committed by
GitHub
Add various language bindings for streaming T-one Russian ASR models (#2576)
This PR adds support for streaming T-one Russian ASR models across various language bindings in the sherpa-onnx library. The changes enable T-one CTC (Connectionist Temporal Classification) model integration by adding new configuration structures and example implementations. - Introduces OnlineToneCtcModelConfig structures across all language bindings (C, C++, Swift, Java, Kotlin, Go, etc.) - Adds T-one CTC model support to WASM implementations for both ASR and keyword spotting - Provides comprehensive example implementations demonstrating T-one model usage in multiple programming languages
正在显示
62 个修改的文件
包含
1351 行增加
和
96 行删除
| @@ -4,6 +4,36 @@ set -ex | @@ -4,6 +4,36 @@ set -ex | ||
| 4 | 4 | ||
| 5 | cd dart-api-examples | 5 | cd dart-api-examples |
| 6 | 6 | ||
| 7 | +pushd streaming-asr | ||
| 8 | + | ||
| 9 | +echo '----------streaming T-one ctc----------' | ||
| 10 | +./run-t-one-ctc.sh | ||
| 11 | +rm -rf sherpa-onnx-* | ||
| 12 | + | ||
| 13 | +echo '----------streaming zipformer ctc HLG----------' | ||
| 14 | +./run-zipformer-ctc-hlg.sh | ||
| 15 | +rm -rf sherpa-onnx-* | ||
| 16 | + | ||
| 17 | +echo '----------streaming zipformer ctc----------' | ||
| 18 | +./run-zipformer-ctc.sh | ||
| 19 | +rm -rf sherpa-onnx-* | ||
| 20 | + | ||
| 21 | +echo '----------streaming zipformer transducer----------' | ||
| 22 | +./run-zipformer-transducer-itn.sh | ||
| 23 | +./run-zipformer-transducer.sh | ||
| 24 | +rm -f itn* | ||
| 25 | +rm -rf sherpa-onnx-* | ||
| 26 | + | ||
| 27 | +echo '----------streaming NeMo transducer----------' | ||
| 28 | +./run-nemo-transducer.sh | ||
| 29 | +rm -rf sherpa-onnx-* | ||
| 30 | + | ||
| 31 | +echo '----------streaming paraformer----------' | ||
| 32 | +./run-paraformer.sh | ||
| 33 | +rm -rf sherpa-onnx-* | ||
| 34 | + | ||
| 35 | +popd # streaming-asr | ||
| 36 | + | ||
| 7 | pushd tts | 37 | pushd tts |
| 8 | 38 | ||
| 9 | echo '----------matcha tts----------' | 39 | echo '----------matcha tts----------' |
| @@ -167,29 +197,3 @@ popd | @@ -167,29 +197,3 @@ popd | ||
| 167 | pushd keyword-spotter | 197 | pushd keyword-spotter |
| 168 | ./run-zh.sh | 198 | ./run-zh.sh |
| 169 | popd | 199 | popd |
| 170 | - | ||
| 171 | -pushd streaming-asr | ||
| 172 | - | ||
| 173 | -echo '----------streaming zipformer ctc HLG----------' | ||
| 174 | -./run-zipformer-ctc-hlg.sh | ||
| 175 | -rm -rf sherpa-onnx-* | ||
| 176 | - | ||
| 177 | -echo '----------streaming zipformer ctc----------' | ||
| 178 | -./run-zipformer-ctc.sh | ||
| 179 | -rm -rf sherpa-onnx-* | ||
| 180 | - | ||
| 181 | -echo '----------streaming zipformer transducer----------' | ||
| 182 | -./run-zipformer-transducer-itn.sh | ||
| 183 | -./run-zipformer-transducer.sh | ||
| 184 | -rm -f itn* | ||
| 185 | -rm -rf sherpa-onnx-* | ||
| 186 | - | ||
| 187 | -echo '----------streaming NeMo transducer----------' | ||
| 188 | -./run-nemo-transducer.sh | ||
| 189 | -rm -rf sherpa-onnx-* | ||
| 190 | - | ||
| 191 | -echo '----------streaming paraformer----------' | ||
| 192 | -./run-paraformer.sh | ||
| 193 | -rm -rf sherpa-onnx-* | ||
| 194 | - | ||
| 195 | -popd # streaming-asr |
| @@ -10,6 +10,17 @@ arch=$(node -p "require('os').arch()") | @@ -10,6 +10,17 @@ arch=$(node -p "require('os').arch()") | ||
| 10 | platform=$(node -p "require('os').platform()") | 10 | platform=$(node -p "require('os').platform()") |
| 11 | node_version=$(node -p "process.versions.node.split('.')[0]") | 11 | node_version=$(node -p "process.versions.node.split('.')[0]") |
| 12 | 12 | ||
| 13 | +echo "----------streaming ASR T-one----------" | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 15 | +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 16 | +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 17 | + | ||
| 18 | +node ./test_asr_streaming_t_one_ctc.js | ||
| 19 | + | ||
| 20 | +rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08 | ||
| 21 | + | ||
| 22 | +echo "----------KittenTTS----------" | ||
| 23 | + | ||
| 13 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | 24 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 |
| 14 | tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | 25 | tar xf kitten-nano-en-v0_1-fp16.tar.bz2 |
| 15 | rm kitten-nano-en-v0_1-fp16.tar.bz2 | 26 | rm kitten-nano-en-v0_1-fp16.tar.bz2 |
| @@ -9,6 +9,13 @@ git status | @@ -9,6 +9,13 @@ git status | ||
| 9 | ls -lh | 9 | ls -lh |
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 13 | +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 14 | +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 15 | +node ./test-online-t-one-ctc.js | ||
| 16 | + | ||
| 17 | +rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08 | ||
| 18 | + | ||
| 12 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | 19 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 |
| 13 | tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | 20 | tar xf kitten-nano-en-v0_1-fp16.tar.bz2 |
| 14 | rm kitten-nano-en-v0_1-fp16.tar.bz2 | 21 | rm kitten-nano-en-v0_1-fp16.tar.bz2 |
| @@ -9,6 +9,9 @@ ls -lh | @@ -9,6 +9,9 @@ ls -lh | ||
| 9 | 9 | ||
| 10 | ./run-test-version.sh | 10 | ./run-test-version.sh |
| 11 | 11 | ||
| 12 | +./run-decode-file-t-one-streaming.sh | ||
| 13 | +rm -rf sherpa-onnx-streaming-* | ||
| 14 | + | ||
| 12 | ./run-compute-speaker-embeddings.sh | 15 | ./run-compute-speaker-embeddings.sh |
| 13 | rm -fv *.wav *.onnx | 16 | rm -fv *.wav *.onnx |
| 14 | 17 |
| @@ -75,6 +75,36 @@ jobs: | @@ -75,6 +75,36 @@ jobs: | ||
| 75 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib | 75 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib |
| 76 | fi | 76 | fi |
| 77 | 77 | ||
| 78 | + - name: Test T-one | ||
| 79 | + shell: bash | ||
| 80 | + run: | | ||
| 81 | + name=streaming-t-one-ctc-c-api | ||
| 82 | + gcc -o $name ./c-api-examples/$name.c \ | ||
| 83 | + -I ./build/install/include \ | ||
| 84 | + -L ./build/install/lib/ \ | ||
| 85 | + -l sherpa-onnx-c-api \ | ||
| 86 | + -l onnxruntime | ||
| 87 | + | ||
| 88 | + ls -lh $name | ||
| 89 | + | ||
| 90 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 91 | + ldd ./$name | ||
| 92 | + echo "----" | ||
| 93 | + readelf -d ./$name | ||
| 94 | + fi | ||
| 95 | + | ||
| 96 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 97 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 98 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 99 | + | ||
| 100 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 101 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 102 | + | ||
| 103 | + ./$name | ||
| 104 | + | ||
| 105 | + rm $name | ||
| 106 | + rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08 | ||
| 107 | + | ||
| 78 | - name: Test KittenTTS | 108 | - name: Test KittenTTS |
| 79 | shell: bash | 109 | shell: bash |
| 80 | run: | | 110 | run: | |
| @@ -530,7 +560,8 @@ jobs: | @@ -530,7 +560,8 @@ jobs: | ||
| 530 | rm -rf sherpa-onnx-* | 560 | rm -rf sherpa-onnx-* |
| 531 | 561 | ||
| 532 | - name: Test ffmpeg | 562 | - name: Test ffmpeg |
| 533 | - if: matrix.os == 'macos-latest' | 563 | + # if: matrix.os == 'macos-latest' |
| 564 | + if: false | ||
| 534 | shell: bash | 565 | shell: bash |
| 535 | run: | | 566 | run: | |
| 536 | brew install ffmpeg | 567 | brew install ffmpeg |
| @@ -78,6 +78,40 @@ jobs: | @@ -78,6 +78,40 @@ jobs: | ||
| 78 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 78 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 79 | fi | 79 | fi |
| 80 | 80 | ||
| 81 | + - name: Test T-one | ||
| 82 | + shell: bash | ||
| 83 | + run: | | ||
| 84 | + name=streaming-t-one-ctc-cxx-api | ||
| 85 | + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \ | ||
| 86 | + -I ./build/install/include \ | ||
| 87 | + -L ./build/install/lib/ \ | ||
| 88 | + -l sherpa-onnx-cxx-api \ | ||
| 89 | + -l sherpa-onnx-c-api \ | ||
| 90 | + -l onnxruntime | ||
| 91 | + | ||
| 92 | + ls -lh $name | ||
| 93 | + | ||
| 94 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 95 | + ls -lh ./$name | ||
| 96 | + ldd ./$name | ||
| 97 | + echo "----" | ||
| 98 | + readelf -d ./$name | ||
| 99 | + fi | ||
| 100 | + | ||
| 101 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 102 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 103 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 104 | + | ||
| 105 | + echo "---" | ||
| 106 | + | ||
| 107 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 108 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 109 | + | ||
| 110 | + ./$name | ||
| 111 | + | ||
| 112 | + rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08 | ||
| 113 | + rm -v ./$name | ||
| 114 | + | ||
| 81 | - name: Test KittenTTS | 115 | - name: Test KittenTTS |
| 82 | shell: bash | 116 | shell: bash |
| 83 | run: | | 117 | run: | |
| @@ -126,6 +126,43 @@ jobs: | @@ -126,6 +126,43 @@ jobs: | ||
| 126 | cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr | 126 | cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr |
| 127 | fi | 127 | fi |
| 128 | 128 | ||
| 129 | + - name: Run Pascal test (Streaming ASR) | ||
| 130 | + shell: bash | ||
| 131 | + run: | | ||
| 132 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 133 | + | ||
| 134 | + cd ./pascal-api-examples | ||
| 135 | + | ||
| 136 | + pushd streaming-asr | ||
| 137 | + | ||
| 138 | + ./run-t-one-ctc.sh | ||
| 139 | + rm -rf sherpa-onnx-* | ||
| 140 | + echo "---" | ||
| 141 | + | ||
| 142 | + ./run-zipformer-transducer.sh | ||
| 143 | + rm -rf sherpa-onnx-* | ||
| 144 | + echo "---" | ||
| 145 | + | ||
| 146 | + ./run-nemo-transducer.sh | ||
| 147 | + rm -rf sherpa-onnx-* | ||
| 148 | + echo "---" | ||
| 149 | + | ||
| 150 | + if [[ ${{ matrix.os }} != 'windows-latest' ]]; then | ||
| 151 | + ./run-paraformer.sh | ||
| 152 | + rm -rf sherpa-onnx-* | ||
| 153 | + echo "---" | ||
| 154 | + | ||
| 155 | + ./run-zipformer-ctc.sh | ||
| 156 | + echo "---" | ||
| 157 | + | ||
| 158 | + ./run-zipformer-ctc-hlg.sh | ||
| 159 | + rm -rf sherpa-onnx-* | ||
| 160 | + echo "---" | ||
| 161 | + fi | ||
| 162 | + | ||
| 163 | + ls -lh | ||
| 164 | + popd | ||
| 165 | + | ||
| 129 | - name: Run Pascal test (VAD test) | 166 | - name: Run Pascal test (VAD test) |
| 130 | shell: bash | 167 | shell: bash |
| 131 | run: | | 168 | run: | |
| @@ -321,36 +358,3 @@ jobs: | @@ -321,36 +358,3 @@ jobs: | ||
| 321 | echo "---" | 358 | echo "---" |
| 322 | ls -lh | 359 | ls -lh |
| 323 | popd | 360 | popd |
| 324 | - | ||
| 325 | - - name: Run Pascal test (Streaming ASR) | ||
| 326 | - shell: bash | ||
| 327 | - run: | | ||
| 328 | - export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 329 | - | ||
| 330 | - cd ./pascal-api-examples | ||
| 331 | - | ||
| 332 | - pushd streaming-asr | ||
| 333 | - | ||
| 334 | - ./run-zipformer-transducer.sh | ||
| 335 | - rm -rf sherpa-onnx-* | ||
| 336 | - echo "---" | ||
| 337 | - | ||
| 338 | - ./run-nemo-transducer.sh | ||
| 339 | - rm -rf sherpa-onnx-* | ||
| 340 | - echo "---" | ||
| 341 | - | ||
| 342 | - if [[ ${{ matrix.os }} != 'windows-latest' ]]; then | ||
| 343 | - ./run-paraformer.sh | ||
| 344 | - rm -rf sherpa-onnx-* | ||
| 345 | - echo "---" | ||
| 346 | - | ||
| 347 | - ./run-zipformer-ctc.sh | ||
| 348 | - echo "---" | ||
| 349 | - | ||
| 350 | - ./run-zipformer-ctc-hlg.sh | ||
| 351 | - rm -rf sherpa-onnx-* | ||
| 352 | - echo "---" | ||
| 353 | - fi | ||
| 354 | - | ||
| 355 | - ls -lh | ||
| 356 | - popd |
| @@ -108,6 +108,13 @@ jobs: | @@ -108,6 +108,13 @@ jobs: | ||
| 108 | cd ./java-api-examples | 108 | cd ./java-api-examples |
| 109 | ./run-version-test.sh | 109 | ./run-version-test.sh |
| 110 | 110 | ||
| 111 | + - name: Run java test (Streaming T-one) | ||
| 112 | + shell: bash | ||
| 113 | + run: | | ||
| 114 | + cd ./java-api-examples | ||
| 115 | + ./run-streaming-decode-file-tone-ctc.sh | ||
| 116 | + rm -rf sherpa-onnx-streaming-t-one-* | ||
| 117 | + | ||
| 111 | - name: Run java test (Nemo Canary) | 118 | - name: Run java test (Nemo Canary) |
| 112 | shell: bash | 119 | shell: bash |
| 113 | run: | | 120 | run: | |
| @@ -140,19 +140,6 @@ jobs: | @@ -140,19 +140,6 @@ jobs: | ||
| 140 | name: ${{ matrix.os }}-libs | 140 | name: ${{ matrix.os }}-libs |
| 141 | path: to-upload/ | 141 | path: to-upload/ |
| 142 | 142 | ||
| 143 | - - name: Test non-streaming decoding files with NeMo Canary | ||
| 144 | - shell: bash | ||
| 145 | - run: | | ||
| 146 | - cd scripts/go/_internal/non-streaming-canary-decode-files/ | ||
| 147 | - ls -lh | ||
| 148 | - go mod tidy | ||
| 149 | - cat go.mod | ||
| 150 | - go build | ||
| 151 | - ls -lh | ||
| 152 | - | ||
| 153 | - ./run.sh | ||
| 154 | - rm -rf sherpa-onnx-nemo-* | ||
| 155 | - | ||
| 156 | - name: Test streaming decoding files | 143 | - name: Test streaming decoding files |
| 157 | shell: bash | 144 | shell: bash |
| 158 | run: | | 145 | run: | |
| @@ -163,6 +150,9 @@ jobs: | @@ -163,6 +150,9 @@ jobs: | ||
| 163 | go build | 150 | go build |
| 164 | ls -lh | 151 | ls -lh |
| 165 | 152 | ||
| 153 | + echo "Test T-one CTC" | ||
| 154 | + ./run-t-one-ctc.sh | ||
| 155 | + | ||
| 166 | echo "Test zipformer2 CTC" | 156 | echo "Test zipformer2 CTC" |
| 167 | ./run-zipformer2-ctc-with-hr.sh | 157 | ./run-zipformer2-ctc-with-hr.sh |
| 168 | ./run-zipformer2-ctc.sh | 158 | ./run-zipformer2-ctc.sh |
| @@ -179,6 +169,21 @@ jobs: | @@ -179,6 +169,21 @@ jobs: | ||
| 179 | ./run-paraformer.sh | 169 | ./run-paraformer.sh |
| 180 | rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | 170 | rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en |
| 181 | 171 | ||
| 172 | + - name: Test non-streaming decoding files with NeMo Canary | ||
| 173 | + shell: bash | ||
| 174 | + run: | | ||
| 175 | + cd scripts/go/_internal/non-streaming-canary-decode-files/ | ||
| 176 | + ls -lh | ||
| 177 | + go mod tidy | ||
| 178 | + cat go.mod | ||
| 179 | + go build | ||
| 180 | + ls -lh | ||
| 181 | + | ||
| 182 | + ./run.sh | ||
| 183 | + rm -rf sherpa-onnx-nemo-* | ||
| 184 | + | ||
| 185 | + | ||
| 186 | + | ||
| 182 | - name: Test non-streaming decoding files | 187 | - name: Test non-streaming decoding files |
| 183 | shell: bash | 188 | shell: bash |
| 184 | run: | | 189 | run: | |
| @@ -44,6 +44,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) | @@ -44,6 +44,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) | ||
| 44 | add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c) | 44 | add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c) |
| 45 | target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api) | 45 | target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api) |
| 46 | 46 | ||
| 47 | +add_executable(streaming-t-one-ctc-c-api streaming-t-one-ctc-c-api.c) | ||
| 48 | +target_link_libraries(streaming-t-one-ctc-c-api sherpa-onnx-c-api) | ||
| 49 | + | ||
| 47 | add_executable(audio-tagging-c-api audio-tagging-c-api.c) | 50 | add_executable(audio-tagging-c-api audio-tagging-c-api.c) |
| 48 | target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api) | 51 | target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api) |
| 49 | 52 |
c-api-examples/streaming-t-one-ctc-c-api.c
0 → 100644
| 1 | +// c-api-examples/streaming-t-one-ctc-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// | ||
| 6 | +// This file demonstrates how to use streaming T-one with sherpa-onnx's C | ||
| 7 | +// API. | ||
| 8 | +// clang-format off | ||
| 9 | +// | ||
| 10 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 11 | +// tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 12 | +// rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 13 | +// | ||
| 14 | +// clang-format on | ||
| 15 | + | ||
| 16 | +#include <stdio.h> | ||
| 17 | +#include <stdlib.h> | ||
| 18 | +#include <string.h> | ||
| 19 | + | ||
| 20 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 21 | + | ||
| 22 | +int32_t main() { | ||
| 23 | + const char *wav_filename = | ||
| 24 | + "sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"; | ||
| 25 | + const char *model = | ||
| 26 | + "sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx"; | ||
| 27 | + const char *tokens = | ||
| 28 | + "sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt"; | ||
| 29 | + const char *provider = "cpu"; | ||
| 30 | + | ||
| 31 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 32 | + if (wave == NULL) { | ||
| 33 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 34 | + return -1; | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + // Zipformer config | ||
| 38 | + SherpaOnnxOnlineToneCtcModelConfig t_one_ctc; | ||
| 39 | + memset(&t_one_ctc, 0, sizeof(t_one_ctc)); | ||
| 40 | + t_one_ctc.model = model; | ||
| 41 | + | ||
| 42 | + // Online model config | ||
| 43 | + SherpaOnnxOnlineModelConfig online_model_config; | ||
| 44 | + memset(&online_model_config, 0, sizeof(online_model_config)); | ||
| 45 | + online_model_config.debug = 1; | ||
| 46 | + online_model_config.num_threads = 1; | ||
| 47 | + online_model_config.provider = provider; | ||
| 48 | + online_model_config.tokens = tokens; | ||
| 49 | + online_model_config.t_one_ctc = t_one_ctc; | ||
| 50 | + | ||
| 51 | + // Recognizer config | ||
| 52 | + SherpaOnnxOnlineRecognizerConfig recognizer_config; | ||
| 53 | + memset(&recognizer_config, 0, sizeof(recognizer_config)); | ||
| 54 | + recognizer_config.decoding_method = "greedy_search"; | ||
| 55 | + recognizer_config.model_config = online_model_config; | ||
| 56 | + | ||
| 57 | + const SherpaOnnxOnlineRecognizer *recognizer = | ||
| 58 | + SherpaOnnxCreateOnlineRecognizer(&recognizer_config); | ||
| 59 | + | ||
| 60 | + if (recognizer == NULL) { | ||
| 61 | + fprintf(stderr, "Please check your config!\n"); | ||
| 62 | + SherpaOnnxFreeWave(wave); | ||
| 63 | + return -1; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + const SherpaOnnxOnlineStream *stream = | ||
| 67 | + SherpaOnnxCreateOnlineStream(recognizer); | ||
| 68 | + | ||
| 69 | + const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50); | ||
| 70 | + int32_t segment_id = 0; | ||
| 71 | + | ||
| 72 | +// simulate streaming. You can choose an arbitrary N | ||
| 73 | +#define N 3200 | ||
| 74 | + | ||
| 75 | + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n", | ||
| 76 | + wave->sample_rate, wave->num_samples, | ||
| 77 | + (float)wave->num_samples / wave->sample_rate); | ||
| 78 | + | ||
| 79 | + float left_paddings[2400] = {0}; // 0.3 seconds at 8 kHz sample rate | ||
| 80 | + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, left_paddings, | ||
| 81 | + 2400); | ||
| 82 | + | ||
| 83 | + int32_t k = 0; | ||
| 84 | + while (k < wave->num_samples) { | ||
| 85 | + int32_t start = k; | ||
| 86 | + int32_t end = | ||
| 87 | + (start + N > wave->num_samples) ? wave->num_samples : (start + N); | ||
| 88 | + k += N; | ||
| 89 | + | ||
| 90 | + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, | ||
| 91 | + wave->samples + start, end - start); | ||
| 92 | + while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) { | ||
| 93 | + SherpaOnnxDecodeOnlineStream(recognizer, stream); | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + const SherpaOnnxOnlineRecognizerResult *r = | ||
| 97 | + SherpaOnnxGetOnlineStreamResult(recognizer, stream); | ||
| 98 | + | ||
| 99 | + if (strlen(r->text)) { | ||
| 100 | + SherpaOnnxPrint(display, segment_id, r->text); | ||
| 101 | + } | ||
| 102 | + | ||
| 103 | + if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) { | ||
| 104 | + if (strlen(r->text)) { | ||
| 105 | + ++segment_id; | ||
| 106 | + } | ||
| 107 | + SherpaOnnxOnlineStreamReset(recognizer, stream); | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + SherpaOnnxDestroyOnlineRecognizerResult(r); | ||
| 111 | + } | ||
| 112 | + | ||
| 113 | + // add some tail padding | ||
| 114 | + float tail_paddings[4800] = {0}; // 0.6 seconds at 8 kHz sample rate | ||
| 115 | + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings, | ||
| 116 | + 4800); | ||
| 117 | + | ||
| 118 | + SherpaOnnxOnlineStreamInputFinished(stream); | ||
| 119 | + while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) { | ||
| 120 | + SherpaOnnxDecodeOnlineStream(recognizer, stream); | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + SherpaOnnxFreeWave(wave); | ||
| 124 | + | ||
| 125 | + const SherpaOnnxOnlineRecognizerResult *r = | ||
| 126 | + SherpaOnnxGetOnlineStreamResult(recognizer, stream); | ||
| 127 | + | ||
| 128 | + if (strlen(r->text)) { | ||
| 129 | + SherpaOnnxPrint(display, segment_id, r->text); | ||
| 130 | + } | ||
| 131 | + | ||
| 132 | + SherpaOnnxDestroyOnlineRecognizerResult(r); | ||
| 133 | + | ||
| 134 | + SherpaOnnxDestroyDisplay(display); | ||
| 135 | + SherpaOnnxDestroyOnlineStream(stream); | ||
| 136 | + SherpaOnnxDestroyOnlineRecognizer(recognizer); | ||
| 137 | + fprintf(stderr, "\n"); | ||
| 138 | + | ||
| 139 | + return 0; | ||
| 140 | +} |
| @@ -15,6 +15,9 @@ target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api) | @@ -15,6 +15,9 @@ target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api) | ||
| 15 | add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc) | 15 | add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc) |
| 16 | target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api) | 16 | target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api) |
| 17 | 17 | ||
| 18 | +add_executable(streaming-t-one-ctc-cxx-api streaming-t-one-ctc-cxx-api.cc) | ||
| 19 | +target_link_libraries(streaming-t-one-ctc-cxx-api sherpa-onnx-cxx-api) | ||
| 20 | + | ||
| 18 | add_executable(whisper-cxx-api ./whisper-cxx-api.cc) | 21 | add_executable(whisper-cxx-api ./whisper-cxx-api.cc) |
| 19 | target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api) | 22 | target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api) |
| 20 | 23 |
| 1 | +// cxx-api-examples/streaming-t-one-ctc-cxx-api.cc | ||
| 2 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// | ||
| 5 | +// This file demonstrates how to use streaming T-one | ||
| 6 | +// with sherpa-onnx's C++ API. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +// | ||
| 10 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 11 | +// tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 12 | +// rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 13 | +// | ||
| 14 | +// clang-format on | ||
| 15 | + | ||
| 16 | +#include <chrono> // NOLINT | ||
| 17 | +#include <iostream> | ||
| 18 | +#include <string> | ||
| 19 | + | ||
| 20 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 21 | + | ||
| 22 | +int32_t main() { | ||
| 23 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 24 | + OnlineRecognizerConfig config; | ||
| 25 | + | ||
| 26 | + // please see | ||
| 27 | + config.model_config.t_one_ctc.model = | ||
| 28 | + "sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx"; | ||
| 29 | + | ||
| 30 | + config.model_config.tokens = | ||
| 31 | + "sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt"; | ||
| 32 | + | ||
| 33 | + config.model_config.num_threads = 1; | ||
| 34 | + | ||
| 35 | + std::cout << "Loading model\n"; | ||
| 36 | + OnlineRecognizer recognizer = OnlineRecognizer::Create(config); | ||
| 37 | + if (!recognizer.Get()) { | ||
| 38 | + std::cerr << "Please check your config\n"; | ||
| 39 | + return -1; | ||
| 40 | + } | ||
| 41 | + std::cout << "Loading model done\n"; | ||
| 42 | + | ||
| 43 | + std::string wave_filename = | ||
| 44 | + "sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"; | ||
| 45 | + | ||
| 46 | + Wave wave = ReadWave(wave_filename); | ||
| 47 | + if (wave.samples.empty()) { | ||
| 48 | + std::cerr << "Failed to read: '" << wave_filename << "'\n"; | ||
| 49 | + return -1; | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + std::cout << "Start recognition\n"; | ||
| 53 | + const auto begin = std::chrono::steady_clock::now(); | ||
| 54 | + | ||
| 55 | + OnlineStream stream = recognizer.CreateStream(); | ||
| 56 | + std::vector<float> left_padding(2400); // 0.3 seconds at 8kHz | ||
| 57 | + std::vector<float> tail_padding(4800); // 0.6 seconds at 8kHz | ||
| 58 | + | ||
| 59 | + stream.AcceptWaveform(wave.sample_rate, left_padding.data(), | ||
| 60 | + left_padding.size()); | ||
| 61 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 62 | + wave.samples.size()); | ||
| 63 | + stream.AcceptWaveform(wave.sample_rate, tail_padding.data(), | ||
| 64 | + tail_padding.size()); | ||
| 65 | + stream.InputFinished(); | ||
| 66 | + | ||
| 67 | + while (recognizer.IsReady(&stream)) { | ||
| 68 | + recognizer.Decode(&stream); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + OnlineRecognizerResult result = recognizer.GetResult(&stream); | ||
| 72 | + | ||
| 73 | + const auto end = std::chrono::steady_clock::now(); | ||
| 74 | + const float elapsed_seconds = | ||
| 75 | + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin) | ||
| 76 | + .count() / | ||
| 77 | + 1000.; | ||
| 78 | + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate); | ||
| 79 | + float rtf = elapsed_seconds / duration; | ||
| 80 | + | ||
| 81 | + std::cout << "text: " << result.text << "\n"; | ||
| 82 | + printf("Number of threads: %d\n", config.model_config.num_threads); | ||
| 83 | + printf("Duration: %.3fs\n", duration); | ||
| 84 | + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); | ||
| 85 | + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, | ||
| 86 | + duration, rtf); | ||
| 87 | + | ||
| 88 | + return 0; | ||
| 89 | +} |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | + | ||
| 5 | +import 'package:args/args.dart'; | ||
| 6 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 7 | + | ||
| 8 | +import './init.dart'; | ||
| 9 | + | ||
| 10 | +void main(List<String> arguments) async { | ||
| 11 | + await initSherpaOnnx(); | ||
| 12 | + | ||
| 13 | + final parser = ArgParser() | ||
| 14 | + ..addOption('model', help: 'Path to the model') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); | ||
| 17 | + | ||
| 18 | + final res = parser.parse(arguments); | ||
| 19 | + if (res['model'] == null || | ||
| 20 | + res['tokens'] == null || | ||
| 21 | + res['input-wav'] == null) { | ||
| 22 | + print(parser.usage); | ||
| 23 | + exit(1); | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + final model = res['model'] as String; | ||
| 27 | + final tokens = res['tokens'] as String; | ||
| 28 | + final inputWav = res['input-wav'] as String; | ||
| 29 | + | ||
| 30 | + final ctc = sherpa_onnx.OnlineToneCtcModelConfig( | ||
| 31 | + model: model, | ||
| 32 | + ); | ||
| 33 | + | ||
| 34 | + final modelConfig = sherpa_onnx.OnlineModelConfig( | ||
| 35 | + toneCtc: ctc, | ||
| 36 | + tokens: tokens, | ||
| 37 | + debug: true, | ||
| 38 | + numThreads: 1, | ||
| 39 | + ); | ||
| 40 | + final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig); | ||
| 41 | + final recognizer = sherpa_onnx.OnlineRecognizer(config); | ||
| 42 | + | ||
| 43 | + final waveData = sherpa_onnx.readWave(inputWav); | ||
| 44 | + final stream = recognizer.createStream(); | ||
| 45 | + | ||
| 46 | + // 0.3 seconds, assume sampleRate is 8kHz | ||
| 47 | + final leftPaddings = Float32List(2400); | ||
| 48 | + stream.acceptWaveform( | ||
| 49 | + samples: leftPaddings, | ||
| 50 | + sampleRate: waveData.sampleRate, | ||
| 51 | + ); | ||
| 52 | + | ||
| 53 | + // simulate streaming. You can choose an arbitrary chunk size. | ||
| 54 | + // chunkSize of a single sample is also ok, i.e, chunkSize = 1 | ||
| 55 | + final chunkSize = 1600; // 0.1 second for 16kHz | ||
| 56 | + final numChunks = waveData.samples.length ~/ chunkSize; | ||
| 57 | + | ||
| 58 | + var last = ''; | ||
| 59 | + for (int i = 0; i != numChunks; ++i) { | ||
| 60 | + int start = i * chunkSize; | ||
| 61 | + stream.acceptWaveform( | ||
| 62 | + samples: | ||
| 63 | + Float32List.sublistView(waveData.samples, start, start + chunkSize), | ||
| 64 | + sampleRate: waveData.sampleRate, | ||
| 65 | + ); | ||
| 66 | + while (recognizer.isReady(stream)) { | ||
| 67 | + recognizer.decode(stream); | ||
| 68 | + } | ||
| 69 | + final result = recognizer.getResult(stream); | ||
| 70 | + if (result.text != last && result.text != '') { | ||
| 71 | + last = result.text; | ||
| 72 | + print(last); | ||
| 73 | + } | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + // 0.6 seconds, assume sampleRate is 8kHz | ||
| 77 | + final tailPaddings = Float32List(4800); | ||
| 78 | + stream.acceptWaveform( | ||
| 79 | + samples: tailPaddings, | ||
| 80 | + sampleRate: waveData.sampleRate, | ||
| 81 | + ); | ||
| 82 | + | ||
| 83 | + while (recognizer.isReady(stream)) { | ||
| 84 | + recognizer.decode(stream); | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | + final result = recognizer.getResult(stream); | ||
| 88 | + | ||
| 89 | + if (result.text != '') { | ||
| 90 | + print(result.text); | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + stream.free(); | ||
| 94 | + recognizer.free(); | ||
| 95 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 10 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +dart run \ | ||
| 14 | + ./bin/t-one-ctc.dart \ | ||
| 15 | + --model ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \ | ||
| 16 | + --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \ | ||
| 17 | + --input-wav ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav |
| @@ -38,6 +38,9 @@ class OnlineDecodeFiles | @@ -38,6 +38,9 @@ class OnlineDecodeFiles | ||
| 38 | [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] | 38 | [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] |
| 39 | public string Zipformer2Ctc { get; set; } = string.Empty; | 39 | public string Zipformer2Ctc { get; set; } = string.Empty; |
| 40 | 40 | ||
| 41 | + [Option("t-one-ctc", Required = false, HelpText = "Path to T-one CTC onnx model")] | ||
| 42 | + public string ToneCtc { get; set; } = string.Empty; | ||
| 43 | + | ||
| 41 | [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] | 44 | [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] |
| 42 | public int NumThreads { get; set; } = 1; | 45 | public int NumThreads { get; set; } = 1; |
| 43 | 46 | ||
| @@ -173,6 +176,7 @@ to download pre-trained streaming models. | @@ -173,6 +176,7 @@ to download pre-trained streaming models. | ||
| 173 | config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder; | 176 | config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder; |
| 174 | 177 | ||
| 175 | config.ModelConfig.Zipformer2Ctc.Model = options.Zipformer2Ctc; | 178 | config.ModelConfig.Zipformer2Ctc.Model = options.Zipformer2Ctc; |
| 179 | + config.ModelConfig.ToneCtc.Model = options.ToneCtc; | ||
| 176 | 180 | ||
| 177 | config.ModelConfig.Tokens = options.Tokens; | 181 | config.ModelConfig.Tokens = options.Tokens; |
| 178 | config.ModelConfig.Provider = options.Provider; | 182 | config.ModelConfig.Provider = options.Provider; |
| @@ -203,10 +207,15 @@ to download pre-trained streaming models. | @@ -203,10 +207,15 @@ to download pre-trained streaming models. | ||
| 203 | var s = recognizer.CreateStream(); | 207 | var s = recognizer.CreateStream(); |
| 204 | 208 | ||
| 205 | var waveReader = new WaveReader(files[i]); | 209 | var waveReader = new WaveReader(files[i]); |
| 210 | + | ||
| 211 | + var leftPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | ||
| 212 | + s.AcceptWaveform(waveReader.SampleRate, leftPadding); | ||
| 213 | + | ||
| 206 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | 214 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); |
| 207 | 215 | ||
| 208 | - var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | 216 | + var tailPadding = new float[(int)(waveReader.SampleRate * 0.6)]; |
| 209 | s.AcceptWaveform(waveReader.SampleRate, tailPadding); | 217 | s.AcceptWaveform(waveReader.SampleRate, tailPadding); |
| 218 | + | ||
| 210 | s.InputFinished(); | 219 | s.InputFinished(); |
| 211 | 220 | ||
| 212 | streams.Add(s); | 221 | streams.Add(s); |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 8 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +dotnet run -c Release \ | ||
| 12 | + --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \ | ||
| 13 | + --t-one-ctc ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \ | ||
| 14 | + --files ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav |
| @@ -107,12 +107,34 @@ class OnlineNemoCtcModelConfig { | @@ -107,12 +107,34 @@ class OnlineNemoCtcModelConfig { | ||
| 107 | final String model; | 107 | final String model; |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | +class OnlineToneCtcModelConfig { | ||
| 111 | + const OnlineToneCtcModelConfig({this.model = ''}); | ||
| 112 | + | ||
| 113 | + factory OnlineToneCtcModelConfig.fromJson(Map<String, dynamic> json) { | ||
| 114 | + return OnlineToneCtcModelConfig( | ||
| 115 | + model: json['model'] as String? ?? '', | ||
| 116 | + ); | ||
| 117 | + } | ||
| 118 | + | ||
| 119 | + @override | ||
| 120 | + String toString() { | ||
| 121 | + return 'OnlineToneCtcModelConfig(model: $model)'; | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + Map<String, dynamic> toJson() => { | ||
| 125 | + 'model': model, | ||
| 126 | + }; | ||
| 127 | + | ||
| 128 | + final String model; | ||
| 129 | +} | ||
| 130 | + | ||
| 110 | class OnlineModelConfig { | 131 | class OnlineModelConfig { |
| 111 | const OnlineModelConfig({ | 132 | const OnlineModelConfig({ |
| 112 | this.transducer = const OnlineTransducerModelConfig(), | 133 | this.transducer = const OnlineTransducerModelConfig(), |
| 113 | this.paraformer = const OnlineParaformerModelConfig(), | 134 | this.paraformer = const OnlineParaformerModelConfig(), |
| 114 | this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), | 135 | this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), |
| 115 | this.nemoCtc = const OnlineNemoCtcModelConfig(), | 136 | this.nemoCtc = const OnlineNemoCtcModelConfig(), |
| 137 | + this.toneCtc = const OnlineToneCtcModelConfig(), | ||
| 116 | required this.tokens, | 138 | required this.tokens, |
| 117 | this.numThreads = 1, | 139 | this.numThreads = 1, |
| 118 | this.provider = 'cpu', | 140 | this.provider = 'cpu', |
| @@ -132,6 +154,8 @@ class OnlineModelConfig { | @@ -132,6 +154,8 @@ class OnlineModelConfig { | ||
| 132 | json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}), | 154 | json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}), |
| 133 | nemoCtc: OnlineNemoCtcModelConfig.fromJson( | 155 | nemoCtc: OnlineNemoCtcModelConfig.fromJson( |
| 134 | json['nemoCtc'] as Map<String, dynamic>? ?? const {}), | 156 | json['nemoCtc'] as Map<String, dynamic>? ?? const {}), |
| 157 | + toneCtc: OnlineToneCtcModelConfig.fromJson( | ||
| 158 | + json['toneCtc'] as Map<String, dynamic>? ?? const {}), | ||
| 135 | tokens: json['tokens'] as String, | 159 | tokens: json['tokens'] as String, |
| 136 | numThreads: json['numThreads'] as int? ?? 1, | 160 | numThreads: json['numThreads'] as int? ?? 1, |
| 137 | provider: json['provider'] as String? ?? 'cpu', | 161 | provider: json['provider'] as String? ?? 'cpu', |
| @@ -144,7 +168,7 @@ class OnlineModelConfig { | @@ -144,7 +168,7 @@ class OnlineModelConfig { | ||
| 144 | 168 | ||
| 145 | @override | 169 | @override |
| 146 | String toString() { | 170 | String toString() { |
| 147 | - return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; | 171 | + return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, toneCtc: $toneCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; |
| 148 | } | 172 | } |
| 149 | 173 | ||
| 150 | Map<String, dynamic> toJson() => { | 174 | Map<String, dynamic> toJson() => { |
| @@ -152,6 +176,7 @@ class OnlineModelConfig { | @@ -152,6 +176,7 @@ class OnlineModelConfig { | ||
| 152 | 'paraformer': paraformer.toJson(), | 176 | 'paraformer': paraformer.toJson(), |
| 153 | 'zipformer2Ctc': zipformer2Ctc.toJson(), | 177 | 'zipformer2Ctc': zipformer2Ctc.toJson(), |
| 154 | 'nemoCtc': nemoCtc.toJson(), | 178 | 'nemoCtc': nemoCtc.toJson(), |
| 179 | + 'toneCtc': toneCtc.toJson(), | ||
| 155 | 'tokens': tokens, | 180 | 'tokens': tokens, |
| 156 | 'numThreads': numThreads, | 181 | 'numThreads': numThreads, |
| 157 | 'provider': provider, | 182 | 'provider': provider, |
| @@ -165,6 +190,7 @@ class OnlineModelConfig { | @@ -165,6 +190,7 @@ class OnlineModelConfig { | ||
| 165 | final OnlineParaformerModelConfig paraformer; | 190 | final OnlineParaformerModelConfig paraformer; |
| 166 | final OnlineZipformer2CtcModelConfig zipformer2Ctc; | 191 | final OnlineZipformer2CtcModelConfig zipformer2Ctc; |
| 167 | final OnlineNemoCtcModelConfig nemoCtc; | 192 | final OnlineNemoCtcModelConfig nemoCtc; |
| 193 | + final OnlineToneCtcModelConfig toneCtc; | ||
| 168 | 194 | ||
| 169 | final String tokens; | 195 | final String tokens; |
| 170 | 196 | ||
| @@ -362,6 +388,9 @@ class OnlineRecognizer { | @@ -362,6 +388,9 @@ class OnlineRecognizer { | ||
| 362 | // nemoCtc | 388 | // nemoCtc |
| 363 | c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8(); | 389 | c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8(); |
| 364 | 390 | ||
| 391 | + // toneCtc | ||
| 392 | + c.ref.model.toneCtc.model = config.model.toneCtc.model.toNativeUtf8(); | ||
| 393 | + | ||
| 365 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); | 394 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); |
| 366 | c.ref.model.numThreads = config.model.numThreads; | 395 | c.ref.model.numThreads = config.model.numThreads; |
| 367 | c.ref.model.provider = config.model.provider.toNativeUtf8(); | 396 | c.ref.model.provider = config.model.provider.toNativeUtf8(); |
| @@ -415,6 +444,7 @@ class OnlineRecognizer { | @@ -415,6 +444,7 @@ class OnlineRecognizer { | ||
| 415 | calloc.free(c.ref.model.modelType); | 444 | calloc.free(c.ref.model.modelType); |
| 416 | calloc.free(c.ref.model.provider); | 445 | calloc.free(c.ref.model.provider); |
| 417 | calloc.free(c.ref.model.tokens); | 446 | calloc.free(c.ref.model.tokens); |
| 447 | + calloc.free(c.ref.model.toneCtc.model); | ||
| 418 | calloc.free(c.ref.model.nemoCtc.model); | 448 | calloc.free(c.ref.model.nemoCtc.model); |
| 419 | calloc.free(c.ref.model.zipformer2Ctc.model); | 449 | calloc.free(c.ref.model.zipformer2Ctc.model); |
| 420 | calloc.free(c.ref.model.paraformer.encoder); | 450 | calloc.free(c.ref.model.paraformer.encoder); |
| @@ -403,6 +403,10 @@ final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct { | @@ -403,6 +403,10 @@ final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct { | ||
| 403 | external Pointer<Utf8> model; | 403 | external Pointer<Utf8> model; |
| 404 | } | 404 | } |
| 405 | 405 | ||
| 406 | +final class SherpaOnnxOnlineToneCtcModelConfig extends Struct { | ||
| 407 | + external Pointer<Utf8> model; | ||
| 408 | +} | ||
| 409 | + | ||
| 406 | final class SherpaOnnxOnlineModelConfig extends Struct { | 410 | final class SherpaOnnxOnlineModelConfig extends Struct { |
| 407 | external SherpaOnnxOnlineTransducerModelConfig transducer; | 411 | external SherpaOnnxOnlineTransducerModelConfig transducer; |
| 408 | external SherpaOnnxOnlineParaformerModelConfig paraformer; | 412 | external SherpaOnnxOnlineParaformerModelConfig paraformer; |
| @@ -430,6 +434,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct { | @@ -430,6 +434,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct { | ||
| 430 | external int tokensBufSize; | 434 | external int tokensBufSize; |
| 431 | 435 | ||
| 432 | external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc; | 436 | external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc; |
| 437 | + | ||
| 438 | + external SherpaOnnxOnlineToneCtcModelConfig toneCtc; | ||
| 433 | } | 439 | } |
| 434 | 440 | ||
| 435 | final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { | 441 | final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { |
| @@ -27,6 +27,7 @@ func main() { | @@ -27,6 +27,7 @@ func main() { | ||
| 27 | flag.StringVar(&config.ModelConfig.Paraformer.Encoder, "paraformer-encoder", "", "Path to the paraformer encoder model") | 27 | flag.StringVar(&config.ModelConfig.Paraformer.Encoder, "paraformer-encoder", "", "Path to the paraformer encoder model") |
| 28 | flag.StringVar(&config.ModelConfig.Paraformer.Decoder, "paraformer-decoder", "", "Path to the paraformer decoder model") | 28 | flag.StringVar(&config.ModelConfig.Paraformer.Decoder, "paraformer-decoder", "", "Path to the paraformer decoder model") |
| 29 | flag.StringVar(&config.ModelConfig.Zipformer2Ctc.Model, "zipformer2-ctc", "", "Path to the zipformer2 CTC model") | 29 | flag.StringVar(&config.ModelConfig.Zipformer2Ctc.Model, "zipformer2-ctc", "", "Path to the zipformer2 CTC model") |
| 30 | + flag.StringVar(&config.ModelConfig.ToneCtc.Model, "t-one-ctc", "", "Path to the T-one CTC model") | ||
| 30 | flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") | 31 | flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") |
| 31 | flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") | 32 | flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") |
| 32 | flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") | 33 | flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") |
| @@ -59,9 +60,12 @@ func main() { | @@ -59,9 +60,12 @@ func main() { | ||
| 59 | stream := sherpa.NewOnlineStream(recognizer) | 60 | stream := sherpa.NewOnlineStream(recognizer) |
| 60 | defer sherpa.DeleteOnlineStream(stream) | 61 | defer sherpa.DeleteOnlineStream(stream) |
| 61 | 62 | ||
| 63 | + leftPadding := make([]float32, int(float32(sampleRate)*0.3)) | ||
| 64 | + stream.AcceptWaveform(sampleRate, leftPadding) | ||
| 65 | + | ||
| 62 | stream.AcceptWaveform(sampleRate, samples) | 66 | stream.AcceptWaveform(sampleRate, samples) |
| 63 | 67 | ||
| 64 | - tailPadding := make([]float32, int(float32(sampleRate)*0.3)) | 68 | + tailPadding := make([]float32, int(float32(sampleRate)*0.6)) |
| 65 | stream.AcceptWaveform(sampleRate, tailPadding) | 69 | stream.AcceptWaveform(sampleRate, tailPadding) |
| 66 | 70 | ||
| 67 | for recognizer.IsReady(stream) { | 71 | for recognizer.IsReady(stream) { |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | + | ||
| 6 | +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then | ||
| 7 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 8 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 9 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 10 | +fi | ||
| 11 | + | ||
| 12 | +go mod tidy | ||
| 13 | +go build | ||
| 14 | + | ||
| 15 | +./streaming-decode-files \ | ||
| 16 | + --t-one-ctc ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \ | ||
| 17 | + --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \ | ||
| 18 | + ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav |
| @@ -26,8 +26,9 @@ export { Samples, | @@ -26,8 +26,9 @@ export { Samples, | ||
| 26 | 26 | ||
| 27 | export { OnlineStream, | 27 | export { OnlineStream, |
| 28 | OnlineNemoCtcModelConfig, | 28 | OnlineNemoCtcModelConfig, |
| 29 | - OnlineTransducerModelConfig, | ||
| 30 | OnlineParaformerModelConfig, | 29 | OnlineParaformerModelConfig, |
| 30 | + OnlineToneCtcModelConfig, | ||
| 31 | + OnlineTransducerModelConfig, | ||
| 31 | OnlineZipformer2CtcModelConfig, | 32 | OnlineZipformer2CtcModelConfig, |
| 32 | OnlineModelConfig, | 33 | OnlineModelConfig, |
| 33 | OnlineCtcFstDecoderConfig, | 34 | OnlineCtcFstDecoderConfig, |
| @@ -89,6 +89,22 @@ static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig( | @@ -89,6 +89,22 @@ static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig( | ||
| 89 | return c; | 89 | return c; |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | +static SherpaOnnxOnlineToneCtcModelConfig GetOnlineToneCtcModelConfig( | ||
| 93 | + Napi::Object obj) { | ||
| 94 | + SherpaOnnxOnlineToneCtcModelConfig c; | ||
| 95 | + memset(&c, 0, sizeof(c)); | ||
| 96 | + | ||
| 97 | + if (!obj.Has("toneCtc") || !obj.Get("toneCtc").IsObject()) { | ||
| 98 | + return c; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + Napi::Object o = obj.Get("toneCtc").As<Napi::Object>(); | ||
| 102 | + | ||
| 103 | + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); | ||
| 104 | + | ||
| 105 | + return c; | ||
| 106 | +} | ||
| 107 | + | ||
| 92 | static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( | 108 | static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( |
| 93 | Napi::Object obj) { | 109 | Napi::Object obj) { |
| 94 | SherpaOnnxOnlineParaformerModelConfig c; | 110 | SherpaOnnxOnlineParaformerModelConfig c; |
| @@ -120,6 +136,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { | @@ -120,6 +136,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { | ||
| 120 | c.paraformer = GetOnlineParaformerModelConfig(o); | 136 | c.paraformer = GetOnlineParaformerModelConfig(o); |
| 121 | c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o); | 137 | c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o); |
| 122 | c.nemo_ctc = GetOnlineNemoCtcModelConfig(o); | 138 | c.nemo_ctc = GetOnlineNemoCtcModelConfig(o); |
| 139 | + c.t_one_ctc = GetOnlineToneCtcModelConfig(o); | ||
| 123 | 140 | ||
| 124 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 141 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 125 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 142 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| @@ -265,6 +282,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | @@ -265,6 +282,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | ||
| 265 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder); | 282 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder); |
| 266 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder); | 283 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder); |
| 267 | 284 | ||
| 285 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.t_one_ctc.model); | ||
| 268 | SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model); | 286 | SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model); |
| 269 | SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model); | 287 | SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model); |
| 270 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); | 288 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); |
| @@ -50,11 +50,16 @@ export class OnlineNemoCtcModelConfig { | @@ -50,11 +50,16 @@ export class OnlineNemoCtcModelConfig { | ||
| 50 | public model: string = ''; | 50 | public model: string = ''; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | +export class OnlineToneCtcModelConfig { | ||
| 54 | + public model: string = ''; | ||
| 55 | +} | ||
| 56 | + | ||
| 53 | export class OnlineModelConfig { | 57 | export class OnlineModelConfig { |
| 54 | public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig(); | 58 | public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig(); |
| 55 | public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig(); | 59 | public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig(); |
| 56 | - public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); | ||
| 57 | - public nemo_ctc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig(); | 60 | + public zipformer2Ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); |
| 61 | + public nemoCtc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig(); | ||
| 62 | + public toneCtc: OnlineToneCtcModelConfig = new OnlineToneCtcModelConfig(); | ||
| 58 | public tokens: string = ''; | 63 | public tokens: string = ''; |
| 59 | public numThreads: number = 1; | 64 | public numThreads: number = 1; |
| 60 | public provider: string = 'cpu'; | 65 | public provider: string = 'cpu'; |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an online T-one CTC model, i.e., | ||
| 4 | +// streaming T-one CTC model, to decode files. | ||
| 5 | +import com.k2fsa.sherpa.onnx.*; | ||
| 6 | + | ||
| 7 | +public class StreamingDecodeFileToneCtc { | ||
| 8 | + public static void main(String[] args) { | ||
| 9 | + String model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx"; | ||
| 10 | + String tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt"; | ||
| 11 | + String waveFilename = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"; | ||
| 12 | + | ||
| 13 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 14 | + | ||
| 15 | + OnlineToneCtcModelConfig ctc = OnlineToneCtcModelConfig.builder().setModel(model).build(); | ||
| 16 | + | ||
| 17 | + OnlineModelConfig modelConfig = | ||
| 18 | + OnlineModelConfig.builder() | ||
| 19 | + .setToneCtc(ctc) | ||
| 20 | + .setTokens(tokens) | ||
| 21 | + .setNumThreads(1) | ||
| 22 | + .setDebug(true) | ||
| 23 | + .build(); | ||
| 24 | + | ||
| 25 | + OnlineRecognizerConfig config = | ||
| 26 | + OnlineRecognizerConfig.builder() | ||
| 27 | + .setOnlineModelConfig(modelConfig) | ||
| 28 | + .setDecodingMethod("greedy_search") | ||
| 29 | + .build(); | ||
| 30 | + | ||
| 31 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 32 | + OnlineStream stream = recognizer.createStream(); | ||
| 33 | + | ||
| 34 | + float[] leftPaddings = new float[(int) (0.3 * reader.getSampleRate())]; | ||
| 35 | + stream.acceptWaveform(leftPaddings, reader.getSampleRate()); | ||
| 36 | + | ||
| 37 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 38 | + | ||
| 39 | + float[] tailPaddings = new float[(int) (0.6 * reader.getSampleRate())]; | ||
| 40 | + stream.acceptWaveform(tailPaddings, reader.getSampleRate()); | ||
| 41 | + | ||
| 42 | + while (recognizer.isReady(stream)) { | ||
| 43 | + recognizer.decode(stream); | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + String text = recognizer.getResult(stream).getText(); | ||
| 47 | + | ||
| 48 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 49 | + | ||
| 50 | + stream.release(); | ||
| 51 | + recognizer.release(); | ||
| 52 | + } | ||
| 53 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | +set -ex | ||
| 3 | + | ||
| 4 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 5 | + mkdir -p ../build | ||
| 6 | + pushd ../build | ||
| 7 | + cmake \ | ||
| 8 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 11 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 12 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 14 | + .. | ||
| 15 | + | ||
| 16 | + make -j4 | ||
| 17 | + ls -lh lib | ||
| 18 | + popd | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 22 | + pushd ../sherpa-onnx/java-api | ||
| 23 | + make | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +java \ | ||
| 34 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 35 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 36 | + StreamingDecodeFileToneCtc.java |
| @@ -72,6 +72,12 @@ function testSpeakerEmbeddingExtractor() { | @@ -72,6 +72,12 @@ function testSpeakerEmbeddingExtractor() { | ||
| 72 | 72 | ||
| 73 | 73 | ||
| 74 | function testOnlineAsr() { | 74 | function testOnlineAsr() { |
| 75 | + if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then | ||
| 76 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 77 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 78 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 79 | + fi | ||
| 80 | + | ||
| 75 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then | 81 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then |
| 76 | git lfs install | 82 | git lfs install |
| 77 | GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 | 83 | GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 |
| @@ -5,6 +5,7 @@ fun main() { | @@ -5,6 +5,7 @@ fun main() { | ||
| 5 | testOnlineAsr("zipformer2-ctc") | 5 | testOnlineAsr("zipformer2-ctc") |
| 6 | testOnlineAsr("ctc-hlg") | 6 | testOnlineAsr("ctc-hlg") |
| 7 | testOnlineAsr("nemo-ctc") | 7 | testOnlineAsr("nemo-ctc") |
| 8 | + testOnlineAsr("tone-ctc") | ||
| 8 | } | 9 | } |
| 9 | 10 | ||
| 10 | fun testOnlineAsr(type: String) { | 11 | fun testOnlineAsr(type: String) { |
| @@ -54,6 +55,17 @@ fun testOnlineAsr(type: String) { | @@ -54,6 +55,17 @@ fun testOnlineAsr(type: String) { | ||
| 54 | debug = false, | 55 | debug = false, |
| 55 | ) | 56 | ) |
| 56 | } | 57 | } |
| 58 | + "tone-ctc" -> { | ||
| 59 | + waveFilename = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav" | ||
| 60 | + OnlineModelConfig( | ||
| 61 | + toneCtc = OnlineToneCtcModelConfig( | ||
| 62 | + model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx", | ||
| 63 | + ), | ||
| 64 | + tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt", | ||
| 65 | + numThreads = 1, | ||
| 66 | + debug = false, | ||
| 67 | + ) | ||
| 68 | + } | ||
| 57 | "ctc-hlg" -> { | 69 | "ctc-hlg" -> { |
| 58 | waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav" | 70 | waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav" |
| 59 | ctcFstDecoderConfig.graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst" | 71 | ctcFstDecoderConfig.graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst" |
| @@ -95,12 +107,16 @@ fun testOnlineAsr(type: String) { | @@ -95,12 +107,16 @@ fun testOnlineAsr(type: String) { | ||
| 95 | val sampleRate: Int = objArray[1] as Int | 107 | val sampleRate: Int = objArray[1] as Int |
| 96 | 108 | ||
| 97 | val stream = recognizer.createStream() | 109 | val stream = recognizer.createStream() |
| 110 | + | ||
| 111 | + val leftPaddings = FloatArray((sampleRate * 0.3).toInt()) // 0.3 seconds | ||
| 112 | + stream.acceptWaveform(leftPaddings, sampleRate = sampleRate) | ||
| 113 | + | ||
| 98 | stream.acceptWaveform(samples, sampleRate = sampleRate) | 114 | stream.acceptWaveform(samples, sampleRate = sampleRate) |
| 99 | while (recognizer.isReady(stream)) { | 115 | while (recognizer.isReady(stream)) { |
| 100 | recognizer.decode(stream) | 116 | recognizer.decode(stream) |
| 101 | } | 117 | } |
| 102 | 118 | ||
| 103 | - val tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds | 119 | + val tailPaddings = FloatArray((sampleRate * 0.6).toInt()) // 0.6 seconds |
| 104 | stream.acceptWaveform(tailPaddings, sampleRate = sampleRate) | 120 | stream.acceptWaveform(tailPaddings, sampleRate = sampleRate) |
| 105 | stream.inputFinished() | 121 | stream.inputFinished() |
| 106 | while (recognizer.isReady(stream)) { | 122 | while (recognizer.isReady(stream)) { |
| @@ -97,6 +97,7 @@ The following tables list the examples in this folder. | @@ -97,6 +97,7 @@ The following tables list the examples in this folder. | ||
| 97 | 97 | ||
| 98 | |File| Description| | 98 | |File| Description| |
| 99 | |---|---| | 99 | |---|---| |
| 100 | +|[./test_asr_streaming_t_one_ctc.js](./test_asr_streaming_t_one_ctc.js)| Streaming speech recognition from a file using a T-one CTC model| | ||
| 100 | |[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model| | 101 | |[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model| |
| 101 | |[./test_asr_streaming_transducer_with_hr.js](./test_asr_streaming_transducer_with_hr.js)| Streaming speech recognition from a file using a Zipformer transducer model with homophone replacer| | 102 | |[./test_asr_streaming_transducer_with_hr.js](./test_asr_streaming_transducer_with_hr.js)| Streaming speech recognition from a file using a Zipformer transducer model with homophone replacer| |
| 102 | |[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search| | 103 | |[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search| |
| @@ -230,6 +231,16 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lex | @@ -230,6 +231,16 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lex | ||
| 230 | node ./test_asr_streaming_transducer_with_hr.js | 231 | node ./test_asr_streaming_transducer_with_hr.js |
| 231 | ``` | 232 | ``` |
| 232 | 233 | ||
| 234 | +### Streaming speech recognition with T-one CTC | ||
| 235 | + | ||
| 236 | +```bash | ||
| 237 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 238 | +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 239 | +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 240 | + | ||
| 241 | +node ./test_asr_streaming_t_one_ctc.js | ||
| 242 | +``` | ||
| 243 | + | ||
| 233 | ### Streaming speech recognition with Zipformer transducer | 244 | ### Streaming speech recognition with Zipformer transducer |
| 234 | 245 | ||
| 235 | ```bash | 246 | ```bash |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'modelConfig': { | ||
| 8 | + 'toneCtc': { | ||
| 9 | + 'model': './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx', | ||
| 10 | + }, | ||
| 11 | + 'tokens': './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt', | ||
| 12 | + 'numThreads': 2, | ||
| 13 | + 'provider': 'cpu', | ||
| 14 | + 'debug': 1, | ||
| 15 | + } | ||
| 16 | +}; | ||
| 17 | + | ||
| 18 | +const waveFilename = './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav'; | ||
| 19 | + | ||
| 20 | +const recognizer = new sherpa_onnx.OnlineRecognizer(config); | ||
| 21 | +console.log('Started') | ||
| 22 | +let start = Date.now(); | ||
| 23 | +const stream = recognizer.createStream(); | ||
| 24 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 25 | + | ||
| 26 | +const leftPadding = new Float32Array(wave.sampleRate * 0.3); | ||
| 27 | +stream.acceptWaveform({samples: leftPadding, sampleRate: wave.sampleRate}); | ||
| 28 | + | ||
| 29 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 30 | + | ||
| 31 | +const tailPadding = new Float32Array(wave.sampleRate * 0.6); | ||
| 32 | +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); | ||
| 33 | + | ||
| 34 | +while (recognizer.isReady(stream)) { | ||
| 35 | + recognizer.decode(stream); | ||
| 36 | +} | ||
| 37 | +result = recognizer.getResult(stream) | ||
| 38 | +let stop = Date.now(); | ||
| 39 | +console.log('Done') | ||
| 40 | + | ||
| 41 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 42 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 43 | +const real_time_factor = elapsed_seconds / duration; | ||
| 44 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 45 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 46 | +console.log( | ||
| 47 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 48 | + real_time_factor.toFixed(3)) | ||
| 49 | +console.log(waveFilename) | ||
| 50 | +console.log('result\n', result) |
| @@ -393,6 +393,18 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | @@ -393,6 +393,18 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 393 | node ./test-online-paraformer-microphone-mic.js | 393 | node ./test-online-paraformer-microphone-mic.js |
| 394 | ``` | 394 | ``` |
| 395 | 395 | ||
| 396 | +## ./test-online-t-one-ctc.js | ||
| 397 | +[./test-online-t-one-ctc.js](./test-online-t-one-ctc.js) demonstrates | ||
| 398 | +how to decode a file using a streaming T-one model. | ||
| 399 | + | ||
| 400 | +You can use the following command to run it: | ||
| 401 | + | ||
| 402 | +```bash | ||
| 403 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 404 | +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 405 | +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 406 | +node ./test-online-t-one-ctc.js | ||
| 407 | +``` | ||
| 396 | 408 | ||
| 397 | ## ./test-online-paraformer.js | 409 | ## ./test-online-paraformer.js |
| 398 | [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates | 410 | [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates |
nodejs-examples/test-online-t-one-ctc.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createOnlineRecognizer() { | ||
| 10 | + let toneCtc = { | ||
| 11 | + model: './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx', | ||
| 12 | + }; | ||
| 13 | + | ||
| 14 | + let onlineModelConfig = { | ||
| 15 | + toneCtc: toneCtc, | ||
| 16 | + tokens: './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt', | ||
| 17 | + numThreads: 1, | ||
| 18 | + provider: 'cpu', | ||
| 19 | + debug: 1, | ||
| 20 | + }; | ||
| 21 | + | ||
| 22 | + | ||
| 23 | + let recognizerConfig = { | ||
| 24 | + modelConfig: onlineModelConfig, | ||
| 25 | + decodingMethod: 'greedy_search', | ||
| 26 | + maxActivePaths: 4, | ||
| 27 | + enableEndpoint: 1, | ||
| 28 | + rule1MinTrailingSilence: 2.4, | ||
| 29 | + rule2MinTrailingSilence: 1.2, | ||
| 30 | + rule3MinUtteranceLength: 20, | ||
| 31 | + }; | ||
| 32 | + | ||
| 33 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +const recognizer = createOnlineRecognizer(); | ||
| 37 | +const stream = recognizer.createStream(); | ||
| 38 | + | ||
| 39 | +const waveFilename = './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav'; | ||
| 40 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 41 | + | ||
| 42 | +const leftPadding = new Float32Array(wave.sampleRate * 0.3); | ||
| 43 | +const tailPadding = new Float32Array(wave.sampleRate * 0.6); | ||
| 44 | + | ||
| 45 | +stream.acceptWaveform(wave.sampleRate, leftPadding); | ||
| 46 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 47 | +stream.acceptWaveform(wave.sampleRate, tailPadding); | ||
| 48 | + | ||
| 49 | +while (recognizer.isReady(stream)) { | ||
| 50 | + recognizer.decode(stream); | ||
| 51 | +} | ||
| 52 | +const text = recognizer.getResult(stream).text; | ||
| 53 | +console.log(text); | ||
| 54 | + | ||
| 55 | +stream.free(); | ||
| 56 | +recognizer.free(); |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./t_one_ctc.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./t_one_ctc |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a streaming T-one CTC model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program t_one_ctc; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 22 | + Recognizer: TSherpaOnnxOnlineRecognizer; | ||
| 23 | + Stream: TSherpaOnnxOnlineStream; | ||
| 24 | + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; | ||
| 25 | + Wave: TSherpaOnnxWave; | ||
| 26 | + WaveFilename: AnsiString; | ||
| 27 | + LeftPaddings: array of Single; | ||
| 28 | + TailPaddings: array of Single; | ||
| 29 | + | ||
| 30 | + Start: TDateTime; | ||
| 31 | + Stop: TDateTime; | ||
| 32 | + | ||
| 33 | + Elapsed: Single; | ||
| 34 | + Duration: Single; | ||
| 35 | + RealTimeFactor: Single; | ||
| 36 | +begin | ||
| 37 | + Initialize(Config); | ||
| 38 | + | ||
| 39 | + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 40 | + to download model files used in this file.} | ||
| 41 | + Config.ModelConfig.ToneCtc.Model := './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx'; | ||
| 42 | + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt'; | ||
| 43 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 44 | + Config.ModelConfig.NumThreads := 1; | ||
| 45 | + Config.ModelConfig.Debug := False; | ||
| 46 | + | ||
| 47 | + WaveFilename := './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav'; | ||
| 48 | + | ||
| 49 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 50 | + | ||
| 51 | + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); | ||
| 52 | + | ||
| 53 | + Start := Now; | ||
| 54 | + | ||
| 55 | + Stream := Recognizer.CreateStream(); | ||
| 56 | + | ||
| 57 | + SetLength(LeftPaddings, Round(Wave.SampleRate * 0.3)); {0.3 seconds of padding} | ||
| 58 | + Stream.AcceptWaveform(LeftPaddings, Wave.SampleRate); | ||
| 59 | + | ||
| 60 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 61 | + | ||
| 62 | + SetLength(TailPaddings, Round(Wave.SampleRate * 0.6)); {0.6 seconds of padding} | ||
| 63 | + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); | ||
| 64 | + | ||
| 65 | + Stream.InputFinished(); | ||
| 66 | + | ||
| 67 | + while Recognizer.IsReady(Stream) do | ||
| 68 | + Recognizer.Decode(Stream); | ||
| 69 | + | ||
| 70 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 71 | + | ||
| 72 | + Stop := Now; | ||
| 73 | + | ||
| 74 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 75 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 76 | + RealTimeFactor := Elapsed / Duration; | ||
| 77 | + | ||
| 78 | + WriteLn(RecognitionResult.ToString); | ||
| 79 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 80 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 81 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 82 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 83 | + | ||
| 84 | + {Free resources to avoid memory leak. | ||
| 85 | + | ||
| 86 | + Note: You don't need to invoke them for this simple script. | ||
| 87 | + However, you have to invoke them in your own large/complex project. | ||
| 88 | + } | ||
| 89 | + FreeAndNil(Stream); | ||
| 90 | + FreeAndNil(Recognizer); | ||
| 91 | +end. |
| @@ -469,6 +469,21 @@ def get_models(): | @@ -469,6 +469,21 @@ def get_models(): | ||
| 469 | popd | 469 | popd |
| 470 | """, | 470 | """, |
| 471 | ), | 471 | ), |
| 472 | + Model( | ||
| 473 | + model_name="sherpa-onnx-streaming-t-one-russian-2025-09-08", | ||
| 474 | + idx=27, | ||
| 475 | + lang="ru", | ||
| 476 | + short_name="t_one_ctc_2025_09_08", | ||
| 477 | + cmd=""" | ||
| 478 | + pushd $model_name | ||
| 479 | + | ||
| 480 | + rm -v *.wav | ||
| 481 | + | ||
| 482 | + ls -lh | ||
| 483 | + | ||
| 484 | + popd | ||
| 485 | + """, | ||
| 486 | + ), | ||
| 472 | ] | 487 | ] |
| 473 | 488 | ||
| 474 | return models | 489 | return models |
| @@ -25,6 +25,7 @@ namespace SherpaOnnx | @@ -25,6 +25,7 @@ namespace SherpaOnnx | ||
| 25 | TokensBuf = ""; | 25 | TokensBuf = ""; |
| 26 | TokensBufSize = 0; | 26 | TokensBufSize = 0; |
| 27 | NemoCtc = new OnlineNemoCtcModelConfig(); | 27 | NemoCtc = new OnlineNemoCtcModelConfig(); |
| 28 | + ToneCtc = new OnlineToneCtcModelConfig(); | ||
| 28 | } | 29 | } |
| 29 | 30 | ||
| 30 | public OnlineTransducerModelConfig Transducer; | 31 | public OnlineTransducerModelConfig Transducer; |
| @@ -58,6 +59,8 @@ namespace SherpaOnnx | @@ -58,6 +59,8 @@ namespace SherpaOnnx | ||
| 58 | public int TokensBufSize; | 59 | public int TokensBufSize; |
| 59 | 60 | ||
| 60 | public OnlineNemoCtcModelConfig NemoCtc; | 61 | public OnlineNemoCtcModelConfig NemoCtc; |
| 62 | + | ||
| 63 | + public OnlineToneCtcModelConfig ToneCtc; | ||
| 61 | } | 64 | } |
| 62 | 65 | ||
| 63 | } | 66 | } |
scripts/dotnet/OnlineToneCtcModelConfig.cs
0 → 100644
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + [StructLayout(LayoutKind.Sequential)] | ||
| 8 | + public struct OnlineToneCtcModelConfig | ||
| 9 | + { | ||
| 10 | + public OnlineToneCtcModelConfig() | ||
| 11 | + { | ||
| 12 | + Model = ""; | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 16 | + public string Model; | ||
| 17 | + } | ||
| 18 | +} |
| 1 | +../../../../go-api-examples/non-streaming-tts/run-kitten-en.sh |
| 1 | +../../../../go-api-examples/streaming-decode-files/run-t-one-ctc.sh |
| @@ -81,6 +81,10 @@ type OnlineNemoCtcModelConfig struct { | @@ -81,6 +81,10 @@ type OnlineNemoCtcModelConfig struct { | ||
| 81 | Model string // Path to the onnx model | 81 | Model string // Path to the onnx model |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | +type OnlineToneCtcModelConfig struct { | ||
| 85 | + Model string // Path to the onnx model | ||
| 86 | +} | ||
| 87 | + | ||
| 84 | // Configuration for online/streaming models | 88 | // Configuration for online/streaming models |
| 85 | // | 89 | // |
| 86 | // Please refer to | 90 | // Please refer to |
| @@ -92,6 +96,7 @@ type OnlineModelConfig struct { | @@ -92,6 +96,7 @@ type OnlineModelConfig struct { | ||
| 92 | Paraformer OnlineParaformerModelConfig | 96 | Paraformer OnlineParaformerModelConfig |
| 93 | Zipformer2Ctc OnlineZipformer2CtcModelConfig | 97 | Zipformer2Ctc OnlineZipformer2CtcModelConfig |
| 94 | NemoCtc OnlineNemoCtcModelConfig | 98 | NemoCtc OnlineNemoCtcModelConfig |
| 99 | + ToneCtc OnlineToneCtcModelConfig | ||
| 95 | Tokens string // Path to tokens.txt | 100 | Tokens string // Path to tokens.txt |
| 96 | NumThreads int // Number of threads to use for neural network computation | 101 | NumThreads int // Number of threads to use for neural network computation |
| 97 | Provider string // Optional. Valid values are: cpu, cuda, coreml | 102 | Provider string // Optional. Valid values are: cpu, cuda, coreml |
| @@ -205,6 +210,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | @@ -205,6 +210,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | ||
| 205 | c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model) | 210 | c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model) |
| 206 | defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) | 211 | defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) |
| 207 | 212 | ||
| 213 | + c.model_config.t_one_ctc.model = C.CString(config.ModelConfig.ToneCtc.Model) | ||
| 214 | + defer C.free(unsafe.Pointer(c.model_config.t_one_ctc.model)) | ||
| 215 | + | ||
| 208 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 216 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 209 | defer C.free(unsafe.Pointer(c.model_config.tokens)) | 217 | defer C.free(unsafe.Pointer(c.model_config.tokens)) |
| 210 | 218 |
| @@ -100,6 +100,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | @@ -100,6 +100,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | ||
| 100 | recognizer_config.model_config.nemo_ctc.model = | 100 | recognizer_config.model_config.nemo_ctc.model = |
| 101 | SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, ""); | 101 | SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, ""); |
| 102 | 102 | ||
| 103 | + recognizer_config.model_config.t_one_ctc.model = | ||
| 104 | + SHERPA_ONNX_OR(config->model_config.t_one_ctc.model, ""); | ||
| 105 | + | ||
| 103 | recognizer_config.model_config.num_threads = | 106 | recognizer_config.model_config.num_threads = |
| 104 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); | 107 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); |
| 105 | recognizer_config.model_config.provider_config.provider = | 108 | recognizer_config.model_config.provider_config.provider = |
| @@ -691,8 +694,7 @@ const SherpaOnnxOfflineRecognizerResult *SherpaOnnxGetOfflineStreamResult( | @@ -691,8 +694,7 @@ const SherpaOnnxOfflineRecognizerResult *SherpaOnnxGetOfflineStreamResult( | ||
| 691 | 694 | ||
| 692 | if (!result.durations.empty() && result.durations.size() == r->count) { | 695 | if (!result.durations.empty() && result.durations.size() == r->count) { |
| 693 | r->durations = new float[r->count]; | 696 | r->durations = new float[r->count]; |
| 694 | - std::copy(result.durations.begin(), result.durations.end(), | ||
| 695 | - r->durations); | 697 | + std::copy(result.durations.begin(), result.durations.end(), r->durations); |
| 696 | } else { | 698 | } else { |
| 697 | r->durations = nullptr; | 699 | r->durations = nullptr; |
| 698 | } | 700 | } |
| @@ -104,6 +104,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineNemoCtcModelConfig { | @@ -104,6 +104,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineNemoCtcModelConfig { | ||
| 104 | const char *model; | 104 | const char *model; |
| 105 | } SherpaOnnxOnlineNemoCtcModelConfig; | 105 | } SherpaOnnxOnlineNemoCtcModelConfig; |
| 106 | 106 | ||
| 107 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineToneCtcModelConfig { | ||
| 108 | + const char *model; | ||
| 109 | +} SherpaOnnxOnlineToneCtcModelConfig; | ||
| 110 | + | ||
| 107 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { | 111 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { |
| 108 | SherpaOnnxOnlineTransducerModelConfig transducer; | 112 | SherpaOnnxOnlineTransducerModelConfig transducer; |
| 109 | SherpaOnnxOnlineParaformerModelConfig paraformer; | 113 | SherpaOnnxOnlineParaformerModelConfig paraformer; |
| @@ -125,6 +129,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { | @@ -125,6 +129,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { | ||
| 125 | /// byte size excluding the trailing '\0' | 129 | /// byte size excluding the trailing '\0' |
| 126 | int32_t tokens_buf_size; | 130 | int32_t tokens_buf_size; |
| 127 | SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc; | 131 | SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc; |
| 132 | + SherpaOnnxOnlineToneCtcModelConfig t_one_ctc; | ||
| 128 | } SherpaOnnxOnlineModelConfig; | 133 | } SherpaOnnxOnlineModelConfig; |
| 129 | 134 | ||
| 130 | /// It expects 16 kHz 16-bit single channel wave format. | 135 | /// It expects 16 kHz 16-bit single channel wave format. |
| @@ -70,6 +70,7 @@ OnlineRecognizer OnlineRecognizer::Create( | @@ -70,6 +70,7 @@ OnlineRecognizer OnlineRecognizer::Create( | ||
| 70 | config.model_config.zipformer2_ctc.model.c_str(); | 70 | config.model_config.zipformer2_ctc.model.c_str(); |
| 71 | 71 | ||
| 72 | c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str(); | 72 | c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str(); |
| 73 | + c.model_config.t_one_ctc.model = config.model_config.t_one_ctc.model.c_str(); | ||
| 73 | 74 | ||
| 74 | c.model_config.tokens = config.model_config.tokens.c_str(); | 75 | c.model_config.tokens = config.model_config.tokens.c_str(); |
| 75 | c.model_config.num_threads = config.model_config.num_threads; | 76 | c.model_config.num_threads = config.model_config.num_threads; |
| @@ -36,11 +36,16 @@ struct OnlineNemoCtcModelConfig { | @@ -36,11 +36,16 @@ struct OnlineNemoCtcModelConfig { | ||
| 36 | std::string model; | 36 | std::string model; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | +struct OnlineToneCtcModelConfig { | ||
| 40 | + std::string model; | ||
| 41 | +}; | ||
| 42 | + | ||
| 39 | struct OnlineModelConfig { | 43 | struct OnlineModelConfig { |
| 40 | OnlineTransducerModelConfig transducer; | 44 | OnlineTransducerModelConfig transducer; |
| 41 | OnlineParaformerModelConfig paraformer; | 45 | OnlineParaformerModelConfig paraformer; |
| 42 | OnlineZipformer2CtcModelConfig zipformer2_ctc; | 46 | OnlineZipformer2CtcModelConfig zipformer2_ctc; |
| 43 | OnlineNemoCtcModelConfig nemo_ctc; | 47 | OnlineNemoCtcModelConfig nemo_ctc; |
| 48 | + OnlineToneCtcModelConfig t_one_ctc; | ||
| 44 | std::string tokens; | 49 | std::string tokens; |
| 45 | int32_t num_threads = 1; | 50 | int32_t num_threads = 1; |
| 46 | std::string provider = "cpu"; | 51 | std::string provider = "cpu"; |
| @@ -19,6 +19,7 @@ java_files += HomophoneReplacerConfig.java | @@ -19,6 +19,7 @@ java_files += HomophoneReplacerConfig.java | ||
| 19 | java_files += OnlineLMConfig.java | 19 | java_files += OnlineLMConfig.java |
| 20 | java_files += OnlineParaformerModelConfig.java | 20 | java_files += OnlineParaformerModelConfig.java |
| 21 | java_files += OnlineZipformer2CtcModelConfig.java | 21 | java_files += OnlineZipformer2CtcModelConfig.java |
| 22 | +java_files += OnlineToneCtcModelConfig.java | ||
| 22 | java_files += OnlineNeMoCtcModelConfig.java | 23 | java_files += OnlineNeMoCtcModelConfig.java |
| 23 | java_files += OnlineTransducerModelConfig.java | 24 | java_files += OnlineTransducerModelConfig.java |
| 24 | java_files += OnlineModelConfig.java | 25 | java_files += OnlineModelConfig.java |
| @@ -237,6 +237,7 @@ public class LibraryUtils { | @@ -237,6 +237,7 @@ public class LibraryUtils { | ||
| 237 | dir.deleteOnExit(); // schedule the directory itself | 237 | dir.deleteOnExit(); // schedule the directory itself |
| 238 | } | 238 | } |
| 239 | 239 | ||
| 240 | + static boolean isAndroid() { | ||
| 240 | String vmName = System.getProperty("java.vm.name", "").toLowerCase(Locale.ROOT); | 241 | String vmName = System.getProperty("java.vm.name", "").toLowerCase(Locale.ROOT); |
| 241 | String specVendor = System.getProperty("java.specification.vendor", ""); | 242 | String specVendor = System.getProperty("java.specification.vendor", ""); |
| 242 | return vmName.contains("dalvik") || vmName.contains("art") || | 243 | return vmName.contains("dalvik") || vmName.contains("art") || |
| @@ -8,6 +8,7 @@ public class OnlineModelConfig { | @@ -8,6 +8,7 @@ public class OnlineModelConfig { | ||
| 8 | private final OnlineParaformerModelConfig paraformer; | 8 | private final OnlineParaformerModelConfig paraformer; |
| 9 | private final OnlineZipformer2CtcModelConfig zipformer2Ctc; | 9 | private final OnlineZipformer2CtcModelConfig zipformer2Ctc; |
| 10 | private final OnlineNeMoCtcModelConfig neMoCtc; | 10 | private final OnlineNeMoCtcModelConfig neMoCtc; |
| 11 | + private final OnlineToneCtcModelConfig toneCtc; | ||
| 11 | private final String tokens; | 12 | private final String tokens; |
| 12 | private final int numThreads; | 13 | private final int numThreads; |
| 13 | private final boolean debug; | 14 | private final boolean debug; |
| @@ -21,6 +22,7 @@ public class OnlineModelConfig { | @@ -21,6 +22,7 @@ public class OnlineModelConfig { | ||
| 21 | this.paraformer = builder.paraformer; | 22 | this.paraformer = builder.paraformer; |
| 22 | this.zipformer2Ctc = builder.zipformer2Ctc; | 23 | this.zipformer2Ctc = builder.zipformer2Ctc; |
| 23 | this.neMoCtc = builder.neMoCtc; | 24 | this.neMoCtc = builder.neMoCtc; |
| 25 | + this.toneCtc = builder.toneCtc; | ||
| 24 | this.tokens = builder.tokens; | 26 | this.tokens = builder.tokens; |
| 25 | this.numThreads = builder.numThreads; | 27 | this.numThreads = builder.numThreads; |
| 26 | this.debug = builder.debug; | 28 | this.debug = builder.debug; |
| @@ -50,6 +52,10 @@ public class OnlineModelConfig { | @@ -50,6 +52,10 @@ public class OnlineModelConfig { | ||
| 50 | return neMoCtc; | 52 | return neMoCtc; |
| 51 | } | 53 | } |
| 52 | 54 | ||
| 55 | + public OnlineToneCtcModelConfig getToneCtc() { | ||
| 56 | + return toneCtc; | ||
| 57 | + } | ||
| 58 | + | ||
| 53 | public String getTokens() { | 59 | public String getTokens() { |
| 54 | return tokens; | 60 | return tokens; |
| 55 | } | 61 | } |
| @@ -83,6 +89,7 @@ public class OnlineModelConfig { | @@ -83,6 +89,7 @@ public class OnlineModelConfig { | ||
| 83 | private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build(); | 89 | private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build(); |
| 84 | private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build(); | 90 | private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build(); |
| 85 | private OnlineNeMoCtcModelConfig neMoCtc = OnlineNeMoCtcModelConfig.builder().build(); | 91 | private OnlineNeMoCtcModelConfig neMoCtc = OnlineNeMoCtcModelConfig.builder().build(); |
| 92 | + private OnlineToneCtcModelConfig toneCtc = OnlineToneCtcModelConfig.builder().build(); | ||
| 86 | private String tokens = ""; | 93 | private String tokens = ""; |
| 87 | private int numThreads = 1; | 94 | private int numThreads = 1; |
| 88 | private boolean debug = true; | 95 | private boolean debug = true; |
| @@ -115,6 +122,11 @@ public class OnlineModelConfig { | @@ -115,6 +122,11 @@ public class OnlineModelConfig { | ||
| 115 | return this; | 122 | return this; |
| 116 | } | 123 | } |
| 117 | 124 | ||
| 125 | + public Builder setToneCtc(OnlineToneCtcModelConfig toneCtc) { | ||
| 126 | + this.toneCtc = toneCtc; | ||
| 127 | + return this; | ||
| 128 | + } | ||
| 129 | + | ||
| 118 | public Builder setTokens(String tokens) { | 130 | public Builder setTokens(String tokens) { |
| 119 | this.tokens = tokens; | 131 | this.tokens = tokens; |
| 120 | return this; | 132 | return this; |
| 1 | +package com.k2fsa.sherpa.onnx; | ||
| 2 | + | ||
| 3 | +public class OnlineToneCtcModelConfig { | ||
| 4 | + private final String model; | ||
| 5 | + | ||
| 6 | + private OnlineToneCtcModelConfig(Builder builder) { | ||
| 7 | + this.model = builder.model; | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + public static Builder builder() { | ||
| 11 | + return new Builder(); | ||
| 12 | + } | ||
| 13 | + | ||
| 14 | + public String getModel() { | ||
| 15 | + return model; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static class Builder { | ||
| 19 | + private String model = ""; | ||
| 20 | + | ||
| 21 | + public OnlineToneCtcModelConfig build() { | ||
| 22 | + return new OnlineToneCtcModelConfig(this); | ||
| 23 | + } | ||
| 24 | + | ||
| 25 | + public Builder setModel(String model) { | ||
| 26 | + this.model = model; | ||
| 27 | + return this; | ||
| 28 | + } | ||
| 29 | + } | ||
| 30 | +} |
| @@ -82,6 +82,18 @@ OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls, | @@ -82,6 +82,18 @@ OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls, | ||
| 82 | ans.nemo_ctc.model = p; | 82 | ans.nemo_ctc.model = p; |
| 83 | env->ReleaseStringUTFChars(s, p); | 83 | env->ReleaseStringUTFChars(s, p); |
| 84 | 84 | ||
| 85 | + // streaming T-one CTC | ||
| 86 | + fid = env->GetFieldID(model_config_cls, "toneCtc", | ||
| 87 | + "Lcom/k2fsa/sherpa/onnx/OnlineToneCtcModelConfig;"); | ||
| 88 | + jobject t_one_ctc_config = env->GetObjectField(model_config, fid); | ||
| 89 | + jclass t_one_ctc_config_cls = env->GetObjectClass(t_one_ctc_config); | ||
| 90 | + | ||
| 91 | + fid = env->GetFieldID(t_one_ctc_config_cls, "model", "Ljava/lang/String;"); | ||
| 92 | + s = (jstring)env->GetObjectField(t_one_ctc_config, fid); | ||
| 93 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 94 | + ans.t_one_ctc.model = p; | ||
| 95 | + env->ReleaseStringUTFChars(s, p); | ||
| 96 | + | ||
| 85 | fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;"); | 97 | fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;"); |
| 86 | s = (jstring)env->GetObjectField(model_config, fid); | 98 | s = (jstring)env->GetObjectField(model_config, fid); |
| 87 | p = env->GetStringUTFChars(s, nullptr); | 99 | p = env->GetStringUTFChars(s, nullptr); |
| @@ -33,11 +33,16 @@ data class OnlineNeMoCtcModelConfig( | @@ -33,11 +33,16 @@ data class OnlineNeMoCtcModelConfig( | ||
| 33 | var model: String = "", | 33 | var model: String = "", |
| 34 | ) | 34 | ) |
| 35 | 35 | ||
| 36 | +data class OnlineToneCtcModelConfig( | ||
| 37 | + var model: String = "", | ||
| 38 | +) | ||
| 39 | + | ||
| 36 | data class OnlineModelConfig( | 40 | data class OnlineModelConfig( |
| 37 | var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), | 41 | var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), |
| 38 | var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), | 42 | var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), |
| 39 | var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(), | 43 | var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(), |
| 40 | var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(), | 44 | var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(), |
| 45 | + var toneCtc: OnlineToneCtcModelConfig = OnlineToneCtcModelConfig(), | ||
| 41 | var tokens: String = "", | 46 | var tokens: String = "", |
| 42 | var numThreads: Int = 1, | 47 | var numThreads: Int = 1, |
| 43 | var debug: Boolean = false, | 48 | var debug: Boolean = false, |
| @@ -518,6 +523,16 @@ fun getModelConfig(type: Int): OnlineModelConfig? { | @@ -518,6 +523,16 @@ fun getModelConfig(type: Int): OnlineModelConfig? { | ||
| 518 | ) | 523 | ) |
| 519 | } | 524 | } |
| 520 | 525 | ||
| 526 | + 27 -> { | ||
| 527 | + val modelDir = "sherpa-onnx-streaming-t-one-russian-2025-09-08" | ||
| 528 | + return OnlineModelConfig( | ||
| 529 | + toneCtc = OnlineToneCtcModelConfig( | ||
| 530 | + model = "$modelDir/model.onnx", | ||
| 531 | + ), | ||
| 532 | + tokens = "$modelDir/tokens.txt", | ||
| 533 | + ) | ||
| 534 | + } | ||
| 535 | + | ||
| 521 | 1000 -> { | 536 | 1000 -> { |
| 522 | val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" | 537 | val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" |
| 523 | return OnlineModelConfig( | 538 | return OnlineModelConfig( |
| @@ -182,6 +182,11 @@ type | @@ -182,6 +182,11 @@ type | ||
| 182 | function ToString: AnsiString; | 182 | function ToString: AnsiString; |
| 183 | end; | 183 | end; |
| 184 | 184 | ||
| 185 | + TSherpaOnnxOnlineToneCtcModelConfig = record | ||
| 186 | + Model: AnsiString; | ||
| 187 | + function ToString: AnsiString; | ||
| 188 | + end; | ||
| 189 | + | ||
| 185 | TSherpaOnnxOnlineModelConfig = record | 190 | TSherpaOnnxOnlineModelConfig = record |
| 186 | Transducer: TSherpaOnnxOnlineTransducerModelConfig; | 191 | Transducer: TSherpaOnnxOnlineTransducerModelConfig; |
| 187 | Paraformer: TSherpaOnnxOnlineParaformerModelConfig; | 192 | Paraformer: TSherpaOnnxOnlineParaformerModelConfig; |
| @@ -196,6 +201,7 @@ type | @@ -196,6 +201,7 @@ type | ||
| 196 | TokensBuf: AnsiString; | 201 | TokensBuf: AnsiString; |
| 197 | TokensBufSize: Integer; | 202 | TokensBufSize: Integer; |
| 198 | NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig; | 203 | NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig; |
| 204 | + ToneCtc: TSherpaOnnxOnlineToneCtcModelConfig; | ||
| 199 | function ToString: AnsiString; | 205 | function ToString: AnsiString; |
| 200 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); | 206 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); |
| 201 | end; | 207 | end; |
| @@ -714,6 +720,10 @@ type | @@ -714,6 +720,10 @@ type | ||
| 714 | Model: PAnsiChar; | 720 | Model: PAnsiChar; |
| 715 | end; | 721 | end; |
| 716 | 722 | ||
| 723 | + SherpaOnnxOnlineToneCtcModelConfig = record | ||
| 724 | + Model: PAnsiChar; | ||
| 725 | + end; | ||
| 726 | + | ||
| 717 | SherpaOnnxOnlineModelConfig= record | 727 | SherpaOnnxOnlineModelConfig= record |
| 718 | Transducer: SherpaOnnxOnlineTransducerModelConfig; | 728 | Transducer: SherpaOnnxOnlineTransducerModelConfig; |
| 719 | Paraformer: SherpaOnnxOnlineParaformerModelConfig; | 729 | Paraformer: SherpaOnnxOnlineParaformerModelConfig; |
| @@ -728,6 +738,7 @@ type | @@ -728,6 +738,7 @@ type | ||
| 728 | TokensBuf: PAnsiChar; | 738 | TokensBuf: PAnsiChar; |
| 729 | TokensBufSize: cint32; | 739 | TokensBufSize: cint32; |
| 730 | NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig; | 740 | NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig; |
| 741 | + ToneCtc: SherpaOnnxOnlineToneCtcModelConfig; | ||
| 731 | end; | 742 | end; |
| 732 | SherpaOnnxFeatureConfig = record | 743 | SherpaOnnxFeatureConfig = record |
| 733 | SampleRate: cint32; | 744 | SampleRate: cint32; |
| @@ -1350,6 +1361,12 @@ begin | @@ -1350,6 +1361,12 @@ begin | ||
| 1350 | [Self.Model]); | 1361 | [Self.Model]); |
| 1351 | end; | 1362 | end; |
| 1352 | 1363 | ||
| 1364 | +function TSherpaOnnxOnlineToneCtcModelConfig.ToString: AnsiString; | ||
| 1365 | +begin | ||
| 1366 | + Result := Format('TSherpaOnnxOnlineToneCtcModelConfig(Model := %s)', | ||
| 1367 | + [Self.Model]); | ||
| 1368 | +end; | ||
| 1369 | + | ||
| 1353 | function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; | 1370 | function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; |
| 1354 | begin | 1371 | begin |
| 1355 | Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + | 1372 | Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + |
| @@ -1362,12 +1379,13 @@ begin | @@ -1362,12 +1379,13 @@ begin | ||
| 1362 | 'ModelType := %s, ' + | 1379 | 'ModelType := %s, ' + |
| 1363 | 'ModelingUnit := %s, ' + | 1380 | 'ModelingUnit := %s, ' + |
| 1364 | 'BpeVocab := %s, ' + | 1381 | 'BpeVocab := %s, ' + |
| 1365 | - 'NemoCtc := %s)', | 1382 | + 'NemoCtc := %s, ' + |
| 1383 | + 'ToneCtc := %s)', | ||
| 1366 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1384 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1367 | Self.Zipformer2Ctc.ToString, Self.Tokens, | 1385 | Self.Zipformer2Ctc.ToString, Self.Tokens, |
| 1368 | Self.NumThreads, Self.Provider, Self.Debug.ToString, | 1386 | Self.NumThreads, Self.Provider, Self.Debug.ToString, |
| 1369 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, | 1387 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, |
| 1370 | - Self.NemoCtc.ToString | 1388 | + Self.NemoCtc.ToString, Self.ToneCtc.ToString |
| 1371 | ]); | 1389 | ]); |
| 1372 | end; | 1390 | end; |
| 1373 | 1391 | ||
| @@ -1467,6 +1485,7 @@ begin | @@ -1467,6 +1485,7 @@ begin | ||
| 1467 | 1485 | ||
| 1468 | C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); | 1486 | C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); |
| 1469 | C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model); | 1487 | C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model); |
| 1488 | + C.ModelConfig.ToneCtc.Model := PAnsiChar(Config.ModelConfig.ToneCtc.Model); | ||
| 1470 | 1489 | ||
| 1471 | C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); | 1490 | C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); |
| 1472 | C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; | 1491 | C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; |
| @@ -76,6 +76,14 @@ func sherpaOnnxOnlineNemoCtcModelConfig( | @@ -76,6 +76,14 @@ func sherpaOnnxOnlineNemoCtcModelConfig( | ||
| 76 | ) | 76 | ) |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | +func sherpaOnnxOnlineToneCtcModelConfig( | ||
| 80 | + model: String = "" | ||
| 81 | +) -> SherpaOnnxOnlineToneCtcModelConfig { | ||
| 82 | + return SherpaOnnxOnlineToneCtcModelConfig( | ||
| 83 | + model: toCPointer(model) | ||
| 84 | + ) | ||
| 85 | +} | ||
| 86 | + | ||
| 79 | /// Return an instance of SherpaOnnxOnlineModelConfig. | 87 | /// Return an instance of SherpaOnnxOnlineModelConfig. |
| 80 | /// | 88 | /// |
| 81 | /// Please refer to | 89 | /// Please refer to |
| @@ -101,7 +109,8 @@ func sherpaOnnxOnlineModelConfig( | @@ -101,7 +109,8 @@ func sherpaOnnxOnlineModelConfig( | ||
| 101 | bpeVocab: String = "", | 109 | bpeVocab: String = "", |
| 102 | tokensBuf: String = "", | 110 | tokensBuf: String = "", |
| 103 | tokensBufSize: Int = 0, | 111 | tokensBufSize: Int = 0, |
| 104 | - nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig() | 112 | + nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig(), |
| 113 | + toneCtc: SherpaOnnxOnlineToneCtcModelConfig = sherpaOnnxOnlineToneCtcModelConfig() | ||
| 105 | ) -> SherpaOnnxOnlineModelConfig { | 114 | ) -> SherpaOnnxOnlineModelConfig { |
| 106 | return SherpaOnnxOnlineModelConfig( | 115 | return SherpaOnnxOnlineModelConfig( |
| 107 | transducer: transducer, | 116 | transducer: transducer, |
| @@ -116,7 +125,8 @@ func sherpaOnnxOnlineModelConfig( | @@ -116,7 +125,8 @@ func sherpaOnnxOnlineModelConfig( | ||
| 116 | bpe_vocab: toCPointer(bpeVocab), | 125 | bpe_vocab: toCPointer(bpeVocab), |
| 117 | tokens_buf: toCPointer(tokensBuf), | 126 | tokens_buf: toCPointer(tokensBuf), |
| 118 | tokens_buf_size: Int32(tokensBufSize), | 127 | tokens_buf_size: Int32(tokensBufSize), |
| 119 | - nemo_ctc: nemoCtc | 128 | + nemo_ctc: nemoCtc, |
| 129 | + t_one_ctc: toneCtc | ||
| 120 | ) | 130 | ) |
| 121 | } | 131 | } |
| 122 | 132 |
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + let filePath = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav" | ||
| 17 | + let model = | ||
| 18 | + "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx" | ||
| 19 | + let tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt" | ||
| 20 | + | ||
| 21 | + let toneCtcConfig = sherpaOnnxOnlineToneCtcModelConfig( | ||
| 22 | + model: model) | ||
| 23 | + | ||
| 24 | + let modelConfig = sherpaOnnxOnlineModelConfig( | ||
| 25 | + tokens: tokens, | ||
| 26 | + toneCtc: toneCtcConfig | ||
| 27 | + ) | ||
| 28 | + | ||
| 29 | + let featConfig = sherpaOnnxFeatureConfig( | ||
| 30 | + sampleRate: 8000, | ||
| 31 | + featureDim: 80 | ||
| 32 | + ) | ||
| 33 | + var config = sherpaOnnxOnlineRecognizerConfig( | ||
| 34 | + featConfig: featConfig, // not used | ||
| 35 | + modelConfig: modelConfig | ||
| 36 | + ) | ||
| 37 | + | ||
| 38 | + let recognizer = SherpaOnnxRecognizer(config: &config) | ||
| 39 | + | ||
| 40 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 41 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 42 | + | ||
| 43 | + let audioFormat = audioFile.processingFormat | ||
| 44 | + assert(audioFormat.sampleRate == 8000) | ||
| 45 | + assert(audioFormat.channelCount == 1) | ||
| 46 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 47 | + | ||
| 48 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 49 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 50 | + | ||
| 51 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 52 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 53 | + | ||
| 54 | + let leftPadding = [Float](repeating: 0.0, count: 2400) | ||
| 55 | + recognizer.acceptWaveform(samples: leftPadding, sampleRate: Int(audioFormat.sampleRate)) | ||
| 56 | + | ||
| 57 | + recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate)) | ||
| 58 | + | ||
| 59 | + let tailPadding = [Float](repeating: 0.0, count: 4800) | ||
| 60 | + recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate)) | ||
| 61 | + | ||
| 62 | + recognizer.inputFinished() | ||
| 63 | + while recognizer.isReady() { | ||
| 64 | + recognizer.decode() | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + let result = recognizer.getResult() | ||
| 68 | + print("\nresult is:\n\(result.text)") | ||
| 69 | + print("\nresult is:\n\(result.timestamps)") | ||
| 70 | +} | ||
| 71 | + | ||
| 72 | +@main | ||
| 73 | +struct App { | ||
| 74 | + static func main() { | ||
| 75 | + run() | ||
| 76 | + } | ||
| 77 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -d ./sherpa-onnx-streaming-t-one-russian-2025-09-08 ]; then | ||
| 11 | + echo "Downloading the pre-trained model for testing." | ||
| 12 | + | ||
| 13 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 14 | + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 15 | + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +if [ ! -e ./decode-file-t-one-streaming ]; then | ||
| 19 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 20 | + swiftc \ | ||
| 21 | + -lc++ \ | ||
| 22 | + -I ../build-swift-macos/install/include \ | ||
| 23 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 24 | + ./decode-file-t-one-streaming.swift ./SherpaOnnx.swift \ | ||
| 25 | + -L ../build-swift-macos/install/lib/ \ | ||
| 26 | + -l sherpa-onnx \ | ||
| 27 | + -l onnxruntime \ | ||
| 28 | + -o decode-file-t-one-streaming | ||
| 29 | + | ||
| 30 | + strip decode-file-t-one-streaming | ||
| 31 | +else | ||
| 32 | + echo "./decode-file-t-one-streaming exists - skip building" | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 36 | +./decode-file-t-one-streaming |
| @@ -31,6 +31,10 @@ function freeConfig(config, Module) { | @@ -31,6 +31,10 @@ function freeConfig(config, Module) { | ||
| 31 | freeConfig(config.nemoCtc, Module) | 31 | freeConfig(config.nemoCtc, Module) |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | + if ('toneCtc' in config) { | ||
| 35 | + freeConfig(config.toneCtc, Module) | ||
| 36 | + } | ||
| 37 | + | ||
| 34 | if ('whisper' in config) { | 38 | if ('whisper' in config) { |
| 35 | freeConfig(config.whisper, Module) | 39 | freeConfig(config.whisper, Module) |
| 36 | } | 40 | } |
| @@ -173,6 +177,22 @@ function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) { | @@ -173,6 +177,22 @@ function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) { | ||
| 173 | } | 177 | } |
| 174 | } | 178 | } |
| 175 | 179 | ||
| 180 | +function initSherpaOnnxOnlineToneCtcModelConfig(config, Module) { | ||
| 181 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; | ||
| 182 | + const buffer = Module._malloc(n); | ||
| 183 | + | ||
| 184 | + const len = 1 * 4; // 1 pointer | ||
| 185 | + const ptr = Module._malloc(len); | ||
| 186 | + | ||
| 187 | + Module.stringToUTF8(config.model || '', buffer, n); | ||
| 188 | + | ||
| 189 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 190 | + | ||
| 191 | + return { | ||
| 192 | + buffer: buffer, ptr: ptr, len: len, | ||
| 193 | + } | ||
| 194 | +} | ||
| 195 | + | ||
| 176 | function initSherpaOnnxOnlineModelConfig(config, Module) { | 196 | function initSherpaOnnxOnlineModelConfig(config, Module) { |
| 177 | if (!('transducer' in config)) { | 197 | if (!('transducer' in config)) { |
| 178 | config.transducer = { | 198 | config.transducer = { |
| @@ -201,6 +221,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -201,6 +221,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 201 | }; | 221 | }; |
| 202 | } | 222 | } |
| 203 | 223 | ||
| 224 | + if (!('toneCtc' in config)) { | ||
| 225 | + config.toneCtc = { | ||
| 226 | + model: '', | ||
| 227 | + }; | ||
| 228 | + } | ||
| 229 | + | ||
| 204 | if (!('tokensBuf' in config)) { | 230 | if (!('tokensBuf' in config)) { |
| 205 | config.tokensBuf = ''; | 231 | config.tokensBuf = ''; |
| 206 | } | 232 | } |
| @@ -221,8 +247,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -221,8 +247,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 221 | const nemoCtc = | 247 | const nemoCtc = |
| 222 | initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module); | 248 | initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module); |
| 223 | 249 | ||
| 224 | - const len = | ||
| 225 | - transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 + nemoCtc.len; | 250 | + const toneCtc = |
| 251 | + initSherpaOnnxOnlineToneCtcModelConfig(config.toneCtc, Module); | ||
| 252 | + | ||
| 253 | + const len = transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 + | ||
| 254 | + nemoCtc.len + toneCtc.len; | ||
| 226 | 255 | ||
| 227 | const ptr = Module._malloc(len); | 256 | const ptr = Module._malloc(len); |
| 228 | 257 | ||
| @@ -308,9 +337,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -308,9 +337,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 308 | Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); | 337 | Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); |
| 309 | offset += nemoCtc.len; | 338 | offset += nemoCtc.len; |
| 310 | 339 | ||
| 340 | + Module._CopyHeap(toneCtc.ptr, toneCtc.len, ptr + offset); | ||
| 341 | + offset += toneCtc.len; | ||
| 342 | + | ||
| 311 | return { | 343 | return { |
| 312 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 344 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 313 | - paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc | 345 | + paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc, |
| 346 | + toneCtc: toneCtc, | ||
| 314 | } | 347 | } |
| 315 | } | 348 | } |
| 316 | 349 | ||
| @@ -519,6 +552,10 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -519,6 +552,10 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 519 | model: '', | 552 | model: '', |
| 520 | }; | 553 | }; |
| 521 | 554 | ||
| 555 | + const onlineToneCtcModelConfig = { | ||
| 556 | + model: '', | ||
| 557 | + }; | ||
| 558 | + | ||
| 522 | let type = 0; | 559 | let type = 0; |
| 523 | 560 | ||
| 524 | switch (type) { | 561 | switch (type) { |
| @@ -541,6 +578,10 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -541,6 +578,10 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 541 | // nemoCtc | 578 | // nemoCtc |
| 542 | onlineNemoCtcModelConfig.model = './nemo-ctc.onnx'; | 579 | onlineNemoCtcModelConfig.model = './nemo-ctc.onnx'; |
| 543 | break; | 580 | break; |
| 581 | + case 4: | ||
| 582 | + // toneCtc | ||
| 583 | + onlineToneCtcModelConfig.model = './tone-ctc.onnx'; | ||
| 584 | + break; | ||
| 544 | } | 585 | } |
| 545 | 586 | ||
| 546 | 587 | ||
| @@ -549,6 +590,7 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -549,6 +590,7 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 549 | paraformer: onlineParaformerModelConfig, | 590 | paraformer: onlineParaformerModelConfig, |
| 550 | zipformer2Ctc: onlineZipformer2CtcModelConfig, | 591 | zipformer2Ctc: onlineZipformer2CtcModelConfig, |
| 551 | nemoCtc: onlineNemoCtcModelConfig, | 592 | nemoCtc: onlineNemoCtcModelConfig, |
| 593 | + toneCtc: onlineToneCtcModelConfig, | ||
| 552 | tokens: './tokens.txt', | 594 | tokens: './tokens.txt', |
| 553 | numThreads: 1, | 595 | numThreads: 1, |
| 554 | provider: 'cpu', | 596 | provider: 'cpu', |
| @@ -559,8 +601,8 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -559,8 +601,8 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 559 | }; | 601 | }; |
| 560 | 602 | ||
| 561 | const featureConfig = { | 603 | const featureConfig = { |
| 562 | - sampleRate: 16000, | ||
| 563 | - featureDim: 80, | 604 | + sampleRate: 16000, // it is ignored when toneCtc is used |
| 605 | + featureDim: 80, // it is ignored when toneCtc is used | ||
| 564 | }; | 606 | }; |
| 565 | 607 | ||
| 566 | let recognizerConfig = { | 608 | let recognizerConfig = { |
| @@ -21,7 +21,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | @@ -21,7 +21,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | ||
| 21 | sizeof(SherpaOnnxOnlineTransducerModelConfig) + | 21 | sizeof(SherpaOnnxOnlineTransducerModelConfig) + |
| 22 | sizeof(SherpaOnnxOnlineParaformerModelConfig) + | 22 | sizeof(SherpaOnnxOnlineParaformerModelConfig) + |
| 23 | sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + | 23 | sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + |
| 24 | - sizeof(SherpaOnnxOnlineNemoCtcModelConfig), | 24 | + sizeof(SherpaOnnxOnlineNemoCtcModelConfig) + |
| 25 | + sizeof(SherpaOnnxOnlineToneCtcModelConfig), | ||
| 25 | ""); | 26 | ""); |
| 26 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 27 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 27 | static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); | 28 | static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); |
| @@ -39,6 +40,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | @@ -39,6 +40,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | ||
| 39 | auto paraformer_model_config = &model_config->paraformer; | 40 | auto paraformer_model_config = &model_config->paraformer; |
| 40 | auto ctc_model_config = &model_config->zipformer2_ctc; | 41 | auto ctc_model_config = &model_config->zipformer2_ctc; |
| 41 | auto nemo_ctc = &model_config->nemo_ctc; | 42 | auto nemo_ctc = &model_config->nemo_ctc; |
| 43 | + auto t_one_ctc = &model_config->t_one_ctc; | ||
| 42 | 44 | ||
| 43 | fprintf(stdout, "----------online transducer model config----------\n"); | 45 | fprintf(stdout, "----------online transducer model config----------\n"); |
| 44 | fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); | 46 | fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); |
| @@ -55,6 +57,9 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | @@ -55,6 +57,9 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | ||
| 55 | fprintf(stdout, "----------online nemo ctc model config----------\n"); | 57 | fprintf(stdout, "----------online nemo ctc model config----------\n"); |
| 56 | fprintf(stdout, "model: %s\n", nemo_ctc->model); | 58 | fprintf(stdout, "model: %s\n", nemo_ctc->model); |
| 57 | 59 | ||
| 60 | + fprintf(stdout, "----------online t-one ctc model config----------\n"); | ||
| 61 | + fprintf(stdout, "model: %s\n", t_one_ctc->model); | ||
| 62 | + | ||
| 58 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 63 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 59 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 64 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 60 | fprintf(stdout, "provider: %s\n", model_config->provider); | 65 | fprintf(stdout, "provider: %s\n", model_config->provider); |
| @@ -75,9 +75,10 @@ function initModelConfig(config, Module) { | @@ -75,9 +75,10 @@ function initModelConfig(config, Module) { | ||
| 75 | const paraformer_len = 2 * 4 | 75 | const paraformer_len = 2 * 4 |
| 76 | const zipfomer2_ctc_len = 1 * 4 | 76 | const zipfomer2_ctc_len = 1 * 4 |
| 77 | const nemo_ctc_len = 1 * 4 | 77 | const nemo_ctc_len = 1 * 4 |
| 78 | + const t_one_ctc_len = 1 * 4 | ||
| 78 | 79 | ||
| 79 | const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 + | 80 | const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 + |
| 80 | - nemo_ctc_len; | 81 | + nemo_ctc_len + t_one_ctc_len; |
| 81 | 82 | ||
| 82 | const ptr = Module._malloc(len); | 83 | const ptr = Module._malloc(len); |
| 83 | Module.HEAPU8.fill(0, ptr, ptr + len); | 84 | Module.HEAPU8.fill(0, ptr, ptr + len); |
| @@ -152,6 +153,7 @@ function initModelConfig(config, Module) { | @@ -152,6 +153,7 @@ function initModelConfig(config, Module) { | ||
| 152 | 153 | ||
| 153 | Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); | 154 | Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); |
| 154 | offset += 4; | 155 | offset += 4; |
| 156 | + // skip nemo_ctc and t_one_ctc | ||
| 155 | 157 | ||
| 156 | return { | 158 | return { |
| 157 | buffer: buffer, ptr: ptr, len: len, transducer: transducer | 159 | buffer: buffer, ptr: ptr, len: len, transducer: transducer |
| @@ -20,7 +20,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | @@ -20,7 +20,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | ||
| 20 | sizeof(SherpaOnnxOnlineTransducerModelConfig) + | 20 | sizeof(SherpaOnnxOnlineTransducerModelConfig) + |
| 21 | sizeof(SherpaOnnxOnlineParaformerModelConfig) + | 21 | sizeof(SherpaOnnxOnlineParaformerModelConfig) + |
| 22 | sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + | 22 | sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + |
| 23 | - sizeof(SherpaOnnxOnlineNemoCtcModelConfig), | 23 | + sizeof(SherpaOnnxOnlineNemoCtcModelConfig) + |
| 24 | + sizeof(SherpaOnnxOnlineToneCtcModelConfig), | ||
| 24 | ""); | 25 | ""); |
| 25 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 26 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 26 | static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) == | 27 | static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) == |
-
请 注册 或 登录 后发表评论