Fangjun Kuang
Committed by GitHub

Add various language bindings for streaming T-one Russian ASR models (#2576)

This PR adds support for streaming T-one Russian ASR models across various language bindings in the sherpa-onnx library. The changes enable T-one CTC (Connectionist Temporal Classification) model integration by adding new configuration structures and example implementations.

- Introduces OnlineToneCtcModelConfig structures across all language bindings (C, C++, Swift, Java, Kotlin, Go, etc.)
- Adds T-one CTC model support to WASM implementations for both ASR and keyword spotting
- Provides comprehensive example implementations demonstrating T-one model usage in multiple programming languages
正在显示 62 个修改的文件 包含 1351 行增加96 行删除
@@ -4,6 +4,36 @@ set -ex @@ -4,6 +4,36 @@ set -ex
4 4
5 cd dart-api-examples 5 cd dart-api-examples
6 6
  7 +pushd streaming-asr
  8 +
  9 +echo '----------streaming T-one ctc----------'
  10 +./run-t-one-ctc.sh
  11 +rm -rf sherpa-onnx-*
  12 +
  13 +echo '----------streaming zipformer ctc HLG----------'
  14 +./run-zipformer-ctc-hlg.sh
  15 +rm -rf sherpa-onnx-*
  16 +
  17 +echo '----------streaming zipformer ctc----------'
  18 +./run-zipformer-ctc.sh
  19 +rm -rf sherpa-onnx-*
  20 +
  21 +echo '----------streaming zipformer transducer----------'
  22 +./run-zipformer-transducer-itn.sh
  23 +./run-zipformer-transducer.sh
  24 +rm -f itn*
  25 +rm -rf sherpa-onnx-*
  26 +
  27 +echo '----------streaming NeMo transducer----------'
  28 +./run-nemo-transducer.sh
  29 +rm -rf sherpa-onnx-*
  30 +
  31 +echo '----------streaming paraformer----------'
  32 +./run-paraformer.sh
  33 +rm -rf sherpa-onnx-*
  34 +
  35 +popd # streaming-asr
  36 +
7 pushd tts 37 pushd tts
8 38
9 echo '----------matcha tts----------' 39 echo '----------matcha tts----------'
@@ -167,29 +197,3 @@ popd @@ -167,29 +197,3 @@ popd
167 pushd keyword-spotter 197 pushd keyword-spotter
168 ./run-zh.sh 198 ./run-zh.sh
169 popd 199 popd
170 -  
171 -pushd streaming-asr  
172 -  
173 -echo '----------streaming zipformer ctc HLG----------'  
174 -./run-zipformer-ctc-hlg.sh  
175 -rm -rf sherpa-onnx-*  
176 -  
177 -echo '----------streaming zipformer ctc----------'  
178 -./run-zipformer-ctc.sh  
179 -rm -rf sherpa-onnx-*  
180 -  
181 -echo '----------streaming zipformer transducer----------'  
182 -./run-zipformer-transducer-itn.sh  
183 -./run-zipformer-transducer.sh  
184 -rm -f itn*  
185 -rm -rf sherpa-onnx-*  
186 -  
187 -echo '----------streaming NeMo transducer----------'  
188 -./run-nemo-transducer.sh  
189 -rm -rf sherpa-onnx-*  
190 -  
191 -echo '----------streaming paraformer----------'  
192 -./run-paraformer.sh  
193 -rm -rf sherpa-onnx-*  
194 -  
195 -popd # streaming-asr  
@@ -10,6 +10,17 @@ arch=$(node -p "require('os').arch()") @@ -10,6 +10,17 @@ arch=$(node -p "require('os').arch()")
10 platform=$(node -p "require('os').platform()") 10 platform=$(node -p "require('os').platform()")
11 node_version=$(node -p "process.versions.node.split('.')[0]") 11 node_version=$(node -p "process.versions.node.split('.')[0]")
12 12
  13 +echo "----------streaming ASR T-one----------"
  14 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  15 +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  16 +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  17 +
  18 +node ./test_asr_streaming_t_one_ctc.js
  19 +
  20 +rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08
  21 +
  22 +echo "----------KittenTTS----------"
  23 +
13 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 24 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
14 tar xf kitten-nano-en-v0_1-fp16.tar.bz2 25 tar xf kitten-nano-en-v0_1-fp16.tar.bz2
15 rm kitten-nano-en-v0_1-fp16.tar.bz2 26 rm kitten-nano-en-v0_1-fp16.tar.bz2
@@ -9,6 +9,13 @@ git status @@ -9,6 +9,13 @@ git status
9 ls -lh 9 ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
  12 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  13 +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  14 +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  15 +node ./test-online-t-one-ctc.js
  16 +
  17 +rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08
  18 +
12 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 19 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
13 tar xf kitten-nano-en-v0_1-fp16.tar.bz2 20 tar xf kitten-nano-en-v0_1-fp16.tar.bz2
14 rm kitten-nano-en-v0_1-fp16.tar.bz2 21 rm kitten-nano-en-v0_1-fp16.tar.bz2
@@ -9,6 +9,9 @@ ls -lh @@ -9,6 +9,9 @@ ls -lh
9 9
10 ./run-test-version.sh 10 ./run-test-version.sh
11 11
  12 +./run-decode-file-t-one-streaming.sh
  13 +rm -rf sherpa-onnx-streaming-*
  14 +
12 ./run-compute-speaker-embeddings.sh 15 ./run-compute-speaker-embeddings.sh
13 rm -fv *.wav *.onnx 16 rm -fv *.wav *.onnx
14 17
@@ -75,6 +75,36 @@ jobs: @@ -75,6 +75,36 @@ jobs:
75 otool -L ./install/lib/libsherpa-onnx-c-api.dylib 75 otool -L ./install/lib/libsherpa-onnx-c-api.dylib
76 fi 76 fi
77 77
  78 + - name: Test T-one
  79 + shell: bash
  80 + run: |
  81 + name=streaming-t-one-ctc-c-api
  82 + gcc -o $name ./c-api-examples/$name.c \
  83 + -I ./build/install/include \
  84 + -L ./build/install/lib/ \
  85 + -l sherpa-onnx-c-api \
  86 + -l onnxruntime
  87 +
  88 + ls -lh $name
  89 +
  90 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  91 + ldd ./$name
  92 + echo "----"
  93 + readelf -d ./$name
  94 + fi
  95 +
  96 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  97 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  98 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  99 +
  100 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  101 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  102 +
  103 + ./$name
  104 +
  105 + rm $name
  106 + rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08
  107 +
78 - name: Test KittenTTS 108 - name: Test KittenTTS
79 shell: bash 109 shell: bash
80 run: | 110 run: |
@@ -530,7 +560,8 @@ jobs: @@ -530,7 +560,8 @@ jobs:
530 rm -rf sherpa-onnx-* 560 rm -rf sherpa-onnx-*
531 561
532 - name: Test ffmpeg 562 - name: Test ffmpeg
533 - if: matrix.os == 'macos-latest' 563 + # if: matrix.os == 'macos-latest'
  564 + if: false
534 shell: bash 565 shell: bash
535 run: | 566 run: |
536 brew install ffmpeg 567 brew install ffmpeg
@@ -78,6 +78,40 @@ jobs: @@ -78,6 +78,40 @@ jobs:
78 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 78 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
79 fi 79 fi
80 80
  81 + - name: Test T-one
  82 + shell: bash
  83 + run: |
  84 + name=streaming-t-one-ctc-cxx-api
  85 + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
  86 + -I ./build/install/include \
  87 + -L ./build/install/lib/ \
  88 + -l sherpa-onnx-cxx-api \
  89 + -l sherpa-onnx-c-api \
  90 + -l onnxruntime
  91 +
  92 + ls -lh $name
  93 +
  94 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  95 + ls -lh ./$name
  96 + ldd ./$name
  97 + echo "----"
  98 + readelf -d ./$name
  99 + fi
  100 +
  101 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  102 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  103 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  104 +
  105 + echo "---"
  106 +
  107 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  108 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  109 +
  110 + ./$name
  111 +
  112 + rm -rf sherpa-onnx-streaming-t-one-russian-2025-09-08
  113 + rm -v ./$name
  114 +
81 - name: Test KittenTTS 115 - name: Test KittenTTS
82 shell: bash 116 shell: bash
83 run: | 117 run: |
@@ -126,6 +126,43 @@ jobs: @@ -126,6 +126,43 @@ jobs:
126 cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr 126 cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr
127 fi 127 fi
128 128
  129 + - name: Run Pascal test (Streaming ASR)
  130 + shell: bash
  131 + run: |
  132 + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
  133 +
  134 + cd ./pascal-api-examples
  135 +
  136 + pushd streaming-asr
  137 +
  138 + ./run-t-one-ctc.sh
  139 + rm -rf sherpa-onnx-*
  140 + echo "---"
  141 +
  142 + ./run-zipformer-transducer.sh
  143 + rm -rf sherpa-onnx-*
  144 + echo "---"
  145 +
  146 + ./run-nemo-transducer.sh
  147 + rm -rf sherpa-onnx-*
  148 + echo "---"
  149 +
  150 + if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
  151 + ./run-paraformer.sh
  152 + rm -rf sherpa-onnx-*
  153 + echo "---"
  154 +
  155 + ./run-zipformer-ctc.sh
  156 + echo "---"
  157 +
  158 + ./run-zipformer-ctc-hlg.sh
  159 + rm -rf sherpa-onnx-*
  160 + echo "---"
  161 + fi
  162 +
  163 + ls -lh
  164 + popd
  165 +
129 - name: Run Pascal test (VAD test) 166 - name: Run Pascal test (VAD test)
130 shell: bash 167 shell: bash
131 run: | 168 run: |
@@ -321,36 +358,3 @@ jobs: @@ -321,36 +358,3 @@ jobs:
321 echo "---" 358 echo "---"
322 ls -lh 359 ls -lh
323 popd 360 popd
324 -  
325 - - name: Run Pascal test (Streaming ASR)  
326 - shell: bash  
327 - run: |  
328 - export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH  
329 -  
330 - cd ./pascal-api-examples  
331 -  
332 - pushd streaming-asr  
333 -  
334 - ./run-zipformer-transducer.sh  
335 - rm -rf sherpa-onnx-*  
336 - echo "---"  
337 -  
338 - ./run-nemo-transducer.sh  
339 - rm -rf sherpa-onnx-*  
340 - echo "---"  
341 -  
342 - if [[ ${{ matrix.os }} != 'windows-latest' ]]; then  
343 - ./run-paraformer.sh  
344 - rm -rf sherpa-onnx-*  
345 - echo "---"  
346 -  
347 - ./run-zipformer-ctc.sh  
348 - echo "---"  
349 -  
350 - ./run-zipformer-ctc-hlg.sh  
351 - rm -rf sherpa-onnx-*  
352 - echo "---"  
353 - fi  
354 -  
355 - ls -lh  
356 - popd  
@@ -108,6 +108,13 @@ jobs: @@ -108,6 +108,13 @@ jobs:
108 cd ./java-api-examples 108 cd ./java-api-examples
109 ./run-version-test.sh 109 ./run-version-test.sh
110 110
  111 + - name: Run java test (Streaming T-one)
  112 + shell: bash
  113 + run: |
  114 + cd ./java-api-examples
  115 + ./run-streaming-decode-file-tone-ctc.sh
  116 + rm -rf sherpa-onnx-streaming-t-one-*
  117 +
111 - name: Run java test (Nemo Canary) 118 - name: Run java test (Nemo Canary)
112 shell: bash 119 shell: bash
113 run: | 120 run: |
@@ -140,19 +140,6 @@ jobs: @@ -140,19 +140,6 @@ jobs:
140 name: ${{ matrix.os }}-libs 140 name: ${{ matrix.os }}-libs
141 path: to-upload/ 141 path: to-upload/
142 142
143 - - name: Test non-streaming decoding files with NeMo Canary  
144 - shell: bash  
145 - run: |  
146 - cd scripts/go/_internal/non-streaming-canary-decode-files/  
147 - ls -lh  
148 - go mod tidy  
149 - cat go.mod  
150 - go build  
151 - ls -lh  
152 -  
153 - ./run.sh  
154 - rm -rf sherpa-onnx-nemo-*  
155 -  
156 - name: Test streaming decoding files 143 - name: Test streaming decoding files
157 shell: bash 144 shell: bash
158 run: | 145 run: |
@@ -163,6 +150,9 @@ jobs: @@ -163,6 +150,9 @@ jobs:
163 go build 150 go build
164 ls -lh 151 ls -lh
165 152
  153 + echo "Test T-one CTC"
  154 + ./run-t-one-ctc.sh
  155 +
166 echo "Test zipformer2 CTC" 156 echo "Test zipformer2 CTC"
167 ./run-zipformer2-ctc-with-hr.sh 157 ./run-zipformer2-ctc-with-hr.sh
168 ./run-zipformer2-ctc.sh 158 ./run-zipformer2-ctc.sh
@@ -179,6 +169,21 @@ jobs: @@ -179,6 +169,21 @@ jobs:
179 ./run-paraformer.sh 169 ./run-paraformer.sh
180 rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en 170 rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
181 171
  172 + - name: Test non-streaming decoding files with NeMo Canary
  173 + shell: bash
  174 + run: |
  175 + cd scripts/go/_internal/non-streaming-canary-decode-files/
  176 + ls -lh
  177 + go mod tidy
  178 + cat go.mod
  179 + go build
  180 + ls -lh
  181 +
  182 + ./run.sh
  183 + rm -rf sherpa-onnx-nemo-*
  184 +
  185 +
  186 +
182 - name: Test non-streaming decoding files 187 - name: Test non-streaming decoding files
183 shell: bash 188 shell: bash
184 run: | 189 run: |
@@ -150,3 +150,4 @@ kitten-nano-en-v0_1-fp16 @@ -150,3 +150,4 @@ kitten-nano-en-v0_1-fp16
150 *.jar 150 *.jar
151 vocab.json 151 vocab.json
152 *.so 152 *.so
  153 +sherpa-onnx-streaming-t-one-russian-2025-09-08
@@ -44,6 +44,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) @@ -44,6 +44,9 @@ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
44 add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c) 44 add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
45 target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api) 45 target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
46 46
  47 +add_executable(streaming-t-one-ctc-c-api streaming-t-one-ctc-c-api.c)
  48 +target_link_libraries(streaming-t-one-ctc-c-api sherpa-onnx-c-api)
  49 +
47 add_executable(audio-tagging-c-api audio-tagging-c-api.c) 50 add_executable(audio-tagging-c-api audio-tagging-c-api.c)
48 target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api) 51 target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api)
49 52
  1 +// c-api-examples/streaming-t-one-ctc-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +//
  6 +// This file demonstrates how to use streaming T-one with sherpa-onnx's C
  7 +// API.
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  11 +// tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  12 +// rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  13 +//
  14 +// clang-format on
  15 +
  16 +#include <stdio.h>
  17 +#include <stdlib.h>
  18 +#include <string.h>
  19 +
  20 +#include "sherpa-onnx/c-api/c-api.h"
  21 +
  22 +int32_t main() {
  23 + const char *wav_filename =
  24 + "sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav";
  25 + const char *model =
  26 + "sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx";
  27 + const char *tokens =
  28 + "sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt";
  29 + const char *provider = "cpu";
  30 +
  31 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  32 + if (wave == NULL) {
  33 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  34 + return -1;
  35 + }
  36 +
  37 + // Zipformer config
  38 + SherpaOnnxOnlineToneCtcModelConfig t_one_ctc;
  39 + memset(&t_one_ctc, 0, sizeof(t_one_ctc));
  40 + t_one_ctc.model = model;
  41 +
  42 + // Online model config
  43 + SherpaOnnxOnlineModelConfig online_model_config;
  44 + memset(&online_model_config, 0, sizeof(online_model_config));
  45 + online_model_config.debug = 1;
  46 + online_model_config.num_threads = 1;
  47 + online_model_config.provider = provider;
  48 + online_model_config.tokens = tokens;
  49 + online_model_config.t_one_ctc = t_one_ctc;
  50 +
  51 + // Recognizer config
  52 + SherpaOnnxOnlineRecognizerConfig recognizer_config;
  53 + memset(&recognizer_config, 0, sizeof(recognizer_config));
  54 + recognizer_config.decoding_method = "greedy_search";
  55 + recognizer_config.model_config = online_model_config;
  56 +
  57 + const SherpaOnnxOnlineRecognizer *recognizer =
  58 + SherpaOnnxCreateOnlineRecognizer(&recognizer_config);
  59 +
  60 + if (recognizer == NULL) {
  61 + fprintf(stderr, "Please check your config!\n");
  62 + SherpaOnnxFreeWave(wave);
  63 + return -1;
  64 + }
  65 +
  66 + const SherpaOnnxOnlineStream *stream =
  67 + SherpaOnnxCreateOnlineStream(recognizer);
  68 +
  69 + const SherpaOnnxDisplay *display = SherpaOnnxCreateDisplay(50);
  70 + int32_t segment_id = 0;
  71 +
  72 +// simulate streaming. You can choose an arbitrary N
  73 +#define N 3200
  74 +
  75 + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
  76 + wave->sample_rate, wave->num_samples,
  77 + (float)wave->num_samples / wave->sample_rate);
  78 +
  79 + float left_paddings[2400] = {0}; // 0.3 seconds at 8 kHz sample rate
  80 + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, left_paddings,
  81 + 2400);
  82 +
  83 + int32_t k = 0;
  84 + while (k < wave->num_samples) {
  85 + int32_t start = k;
  86 + int32_t end =
  87 + (start + N > wave->num_samples) ? wave->num_samples : (start + N);
  88 + k += N;
  89 +
  90 + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate,
  91 + wave->samples + start, end - start);
  92 + while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
  93 + SherpaOnnxDecodeOnlineStream(recognizer, stream);
  94 + }
  95 +
  96 + const SherpaOnnxOnlineRecognizerResult *r =
  97 + SherpaOnnxGetOnlineStreamResult(recognizer, stream);
  98 +
  99 + if (strlen(r->text)) {
  100 + SherpaOnnxPrint(display, segment_id, r->text);
  101 + }
  102 +
  103 + if (SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream)) {
  104 + if (strlen(r->text)) {
  105 + ++segment_id;
  106 + }
  107 + SherpaOnnxOnlineStreamReset(recognizer, stream);
  108 + }
  109 +
  110 + SherpaOnnxDestroyOnlineRecognizerResult(r);
  111 + }
  112 +
  113 + // add some tail padding
  114 + float tail_paddings[4800] = {0}; // 0.6 seconds at 8 kHz sample rate
  115 + SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
  116 + 4800);
  117 +
  118 + SherpaOnnxOnlineStreamInputFinished(stream);
  119 + while (SherpaOnnxIsOnlineStreamReady(recognizer, stream)) {
  120 + SherpaOnnxDecodeOnlineStream(recognizer, stream);
  121 + }
  122 +
  123 + SherpaOnnxFreeWave(wave);
  124 +
  125 + const SherpaOnnxOnlineRecognizerResult *r =
  126 + SherpaOnnxGetOnlineStreamResult(recognizer, stream);
  127 +
  128 + if (strlen(r->text)) {
  129 + SherpaOnnxPrint(display, segment_id, r->text);
  130 + }
  131 +
  132 + SherpaOnnxDestroyOnlineRecognizerResult(r);
  133 +
  134 + SherpaOnnxDestroyDisplay(display);
  135 + SherpaOnnxDestroyOnlineStream(stream);
  136 + SherpaOnnxDestroyOnlineRecognizer(recognizer);
  137 + fprintf(stderr, "\n");
  138 +
  139 + return 0;
  140 +}
@@ -15,6 +15,9 @@ target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api) @@ -15,6 +15,9 @@ target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)
15 add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc) 15 add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc)
16 target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api) 16 target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api)
17 17
  18 +add_executable(streaming-t-one-ctc-cxx-api streaming-t-one-ctc-cxx-api.cc)
  19 +target_link_libraries(streaming-t-one-ctc-cxx-api sherpa-onnx-cxx-api)
  20 +
18 add_executable(whisper-cxx-api ./whisper-cxx-api.cc) 21 add_executable(whisper-cxx-api ./whisper-cxx-api.cc)
19 target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api) 22 target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api)
20 23
  1 +// cxx-api-examples/streaming-t-one-ctc-cxx-api.cc
  2 +// Copyright (c) 2025 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use streaming T-one
  6 +// with sherpa-onnx's C++ API.
  7 +//
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  11 +// tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  12 +// rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  13 +//
  14 +// clang-format on
  15 +
  16 +#include <chrono> // NOLINT
  17 +#include <iostream>
  18 +#include <string>
  19 +
  20 +#include "sherpa-onnx/c-api/cxx-api.h"
  21 +
  22 +int32_t main() {
  23 + using namespace sherpa_onnx::cxx; // NOLINT
  24 + OnlineRecognizerConfig config;
  25 +
  26 + // please see
  27 + config.model_config.t_one_ctc.model =
  28 + "sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx";
  29 +
  30 + config.model_config.tokens =
  31 + "sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt";
  32 +
  33 + config.model_config.num_threads = 1;
  34 +
  35 + std::cout << "Loading model\n";
  36 + OnlineRecognizer recognizer = OnlineRecognizer::Create(config);
  37 + if (!recognizer.Get()) {
  38 + std::cerr << "Please check your config\n";
  39 + return -1;
  40 + }
  41 + std::cout << "Loading model done\n";
  42 +
  43 + std::string wave_filename =
  44 + "sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav";
  45 +
  46 + Wave wave = ReadWave(wave_filename);
  47 + if (wave.samples.empty()) {
  48 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  49 + return -1;
  50 + }
  51 +
  52 + std::cout << "Start recognition\n";
  53 + const auto begin = std::chrono::steady_clock::now();
  54 +
  55 + OnlineStream stream = recognizer.CreateStream();
  56 + std::vector<float> left_padding(2400); // 0.3 seconds at 8kHz
  57 + std::vector<float> tail_padding(4800); // 0.6 seconds at 8kHz
  58 +
  59 + stream.AcceptWaveform(wave.sample_rate, left_padding.data(),
  60 + left_padding.size());
  61 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  62 + wave.samples.size());
  63 + stream.AcceptWaveform(wave.sample_rate, tail_padding.data(),
  64 + tail_padding.size());
  65 + stream.InputFinished();
  66 +
  67 + while (recognizer.IsReady(&stream)) {
  68 + recognizer.Decode(&stream);
  69 + }
  70 +
  71 + OnlineRecognizerResult result = recognizer.GetResult(&stream);
  72 +
  73 + const auto end = std::chrono::steady_clock::now();
  74 + const float elapsed_seconds =
  75 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  76 + .count() /
  77 + 1000.;
  78 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  79 + float rtf = elapsed_seconds / duration;
  80 +
  81 + std::cout << "text: " << result.text << "\n";
  82 + printf("Number of threads: %d\n", config.model_config.num_threads);
  83 + printf("Duration: %.3fs\n", duration);
  84 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  85 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  86 + duration, rtf);
  87 +
  88 + return 0;
  89 +}
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('model', help: 'Path to the model')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  17 +
  18 + final res = parser.parse(arguments);
  19 + if (res['model'] == null ||
  20 + res['tokens'] == null ||
  21 + res['input-wav'] == null) {
  22 + print(parser.usage);
  23 + exit(1);
  24 + }
  25 +
  26 + final model = res['model'] as String;
  27 + final tokens = res['tokens'] as String;
  28 + final inputWav = res['input-wav'] as String;
  29 +
  30 + final ctc = sherpa_onnx.OnlineToneCtcModelConfig(
  31 + model: model,
  32 + );
  33 +
  34 + final modelConfig = sherpa_onnx.OnlineModelConfig(
  35 + toneCtc: ctc,
  36 + tokens: tokens,
  37 + debug: true,
  38 + numThreads: 1,
  39 + );
  40 + final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
  41 + final recognizer = sherpa_onnx.OnlineRecognizer(config);
  42 +
  43 + final waveData = sherpa_onnx.readWave(inputWav);
  44 + final stream = recognizer.createStream();
  45 +
  46 + // 0.3 seconds, assume sampleRate is 8kHz
  47 + final leftPaddings = Float32List(2400);
  48 + stream.acceptWaveform(
  49 + samples: leftPaddings,
  50 + sampleRate: waveData.sampleRate,
  51 + );
  52 +
  53 + // simulate streaming. You can choose an arbitrary chunk size.
  54 + // chunkSize of a single sample is also ok, i.e, chunkSize = 1
  55 + final chunkSize = 1600; // 0.1 second for 16kHz
  56 + final numChunks = waveData.samples.length ~/ chunkSize;
  57 +
  58 + var last = '';
  59 + for (int i = 0; i != numChunks; ++i) {
  60 + int start = i * chunkSize;
  61 + stream.acceptWaveform(
  62 + samples:
  63 + Float32List.sublistView(waveData.samples, start, start + chunkSize),
  64 + sampleRate: waveData.sampleRate,
  65 + );
  66 + while (recognizer.isReady(stream)) {
  67 + recognizer.decode(stream);
  68 + }
  69 + final result = recognizer.getResult(stream);
  70 + if (result.text != last && result.text != '') {
  71 + last = result.text;
  72 + print(last);
  73 + }
  74 + }
  75 +
  76 + // 0.6 seconds, assume sampleRate is 8kHz
  77 + final tailPaddings = Float32List(4800);
  78 + stream.acceptWaveform(
  79 + samples: tailPaddings,
  80 + sampleRate: waveData.sampleRate,
  81 + );
  82 +
  83 + while (recognizer.isReady(stream)) {
  84 + recognizer.decode(stream);
  85 + }
  86 +
  87 + final result = recognizer.getResult(stream);
  88 +
  89 + if (result.text != '') {
  90 + print(result.text);
  91 + }
  92 +
  93 + stream.free();
  94 + recognizer.free();
  95 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  9 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  10 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  11 +fi
  12 +
  13 +dart run \
  14 + ./bin/t-one-ctc.dart \
  15 + --model ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \
  16 + --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \
  17 + --input-wav ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav
@@ -38,6 +38,9 @@ class OnlineDecodeFiles @@ -38,6 +38,9 @@ class OnlineDecodeFiles
38 [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] 38 [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")]
39 public string Zipformer2Ctc { get; set; } = string.Empty; 39 public string Zipformer2Ctc { get; set; } = string.Empty;
40 40
  41 + [Option("t-one-ctc", Required = false, HelpText = "Path to T-one CTC onnx model")]
  42 + public string ToneCtc { get; set; } = string.Empty;
  43 +
41 [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] 44 [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
42 public int NumThreads { get; set; } = 1; 45 public int NumThreads { get; set; } = 1;
43 46
@@ -173,6 +176,7 @@ to download pre-trained streaming models. @@ -173,6 +176,7 @@ to download pre-trained streaming models.
173 config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder; 176 config.ModelConfig.Paraformer.Decoder = options.ParaformerDecoder;
174 177
175 config.ModelConfig.Zipformer2Ctc.Model = options.Zipformer2Ctc; 178 config.ModelConfig.Zipformer2Ctc.Model = options.Zipformer2Ctc;
  179 + config.ModelConfig.ToneCtc.Model = options.ToneCtc;
176 180
177 config.ModelConfig.Tokens = options.Tokens; 181 config.ModelConfig.Tokens = options.Tokens;
178 config.ModelConfig.Provider = options.Provider; 182 config.ModelConfig.Provider = options.Provider;
@@ -203,10 +207,15 @@ to download pre-trained streaming models. @@ -203,10 +207,15 @@ to download pre-trained streaming models.
203 var s = recognizer.CreateStream(); 207 var s = recognizer.CreateStream();
204 208
205 var waveReader = new WaveReader(files[i]); 209 var waveReader = new WaveReader(files[i]);
  210 +
  211 + var leftPadding = new float[(int)(waveReader.SampleRate * 0.3)];
  212 + s.AcceptWaveform(waveReader.SampleRate, leftPadding);
  213 +
206 s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); 214 s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
207 215
208 - var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; 216 + var tailPadding = new float[(int)(waveReader.SampleRate * 0.6)];
209 s.AcceptWaveform(waveReader.SampleRate, tailPadding); 217 s.AcceptWaveform(waveReader.SampleRate, tailPadding);
  218 +
210 s.InputFinished(); 219 s.InputFinished();
211 220
212 streams.Add(s); 221 streams.Add(s);
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  7 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  8 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  9 +fi
  10 +
  11 +dotnet run -c Release \
  12 + --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \
  13 + --t-one-ctc ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \
  14 + --files ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav
@@ -107,12 +107,34 @@ class OnlineNemoCtcModelConfig { @@ -107,12 +107,34 @@ class OnlineNemoCtcModelConfig {
107 final String model; 107 final String model;
108 } 108 }
109 109
  110 +class OnlineToneCtcModelConfig {
  111 + const OnlineToneCtcModelConfig({this.model = ''});
  112 +
  113 + factory OnlineToneCtcModelConfig.fromJson(Map<String, dynamic> json) {
  114 + return OnlineToneCtcModelConfig(
  115 + model: json['model'] as String? ?? '',
  116 + );
  117 + }
  118 +
  119 + @override
  120 + String toString() {
  121 + return 'OnlineToneCtcModelConfig(model: $model)';
  122 + }
  123 +
  124 + Map<String, dynamic> toJson() => {
  125 + 'model': model,
  126 + };
  127 +
  128 + final String model;
  129 +}
  130 +
110 class OnlineModelConfig { 131 class OnlineModelConfig {
111 const OnlineModelConfig({ 132 const OnlineModelConfig({
112 this.transducer = const OnlineTransducerModelConfig(), 133 this.transducer = const OnlineTransducerModelConfig(),
113 this.paraformer = const OnlineParaformerModelConfig(), 134 this.paraformer = const OnlineParaformerModelConfig(),
114 this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), 135 this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(),
115 this.nemoCtc = const OnlineNemoCtcModelConfig(), 136 this.nemoCtc = const OnlineNemoCtcModelConfig(),
  137 + this.toneCtc = const OnlineToneCtcModelConfig(),
116 required this.tokens, 138 required this.tokens,
117 this.numThreads = 1, 139 this.numThreads = 1,
118 this.provider = 'cpu', 140 this.provider = 'cpu',
@@ -132,6 +154,8 @@ class OnlineModelConfig { @@ -132,6 +154,8 @@ class OnlineModelConfig {
132 json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}), 154 json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}),
133 nemoCtc: OnlineNemoCtcModelConfig.fromJson( 155 nemoCtc: OnlineNemoCtcModelConfig.fromJson(
134 json['nemoCtc'] as Map<String, dynamic>? ?? const {}), 156 json['nemoCtc'] as Map<String, dynamic>? ?? const {}),
  157 + toneCtc: OnlineToneCtcModelConfig.fromJson(
  158 + json['toneCtc'] as Map<String, dynamic>? ?? const {}),
135 tokens: json['tokens'] as String, 159 tokens: json['tokens'] as String,
136 numThreads: json['numThreads'] as int? ?? 1, 160 numThreads: json['numThreads'] as int? ?? 1,
137 provider: json['provider'] as String? ?? 'cpu', 161 provider: json['provider'] as String? ?? 'cpu',
@@ -144,7 +168,7 @@ class OnlineModelConfig { @@ -144,7 +168,7 @@ class OnlineModelConfig {
144 168
145 @override 169 @override
146 String toString() { 170 String toString() {
147 - return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; 171 + return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, toneCtc: $toneCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)';
148 } 172 }
149 173
150 Map<String, dynamic> toJson() => { 174 Map<String, dynamic> toJson() => {
@@ -152,6 +176,7 @@ class OnlineModelConfig { @@ -152,6 +176,7 @@ class OnlineModelConfig {
152 'paraformer': paraformer.toJson(), 176 'paraformer': paraformer.toJson(),
153 'zipformer2Ctc': zipformer2Ctc.toJson(), 177 'zipformer2Ctc': zipformer2Ctc.toJson(),
154 'nemoCtc': nemoCtc.toJson(), 178 'nemoCtc': nemoCtc.toJson(),
  179 + 'toneCtc': toneCtc.toJson(),
155 'tokens': tokens, 180 'tokens': tokens,
156 'numThreads': numThreads, 181 'numThreads': numThreads,
157 'provider': provider, 182 'provider': provider,
@@ -165,6 +190,7 @@ class OnlineModelConfig { @@ -165,6 +190,7 @@ class OnlineModelConfig {
165 final OnlineParaformerModelConfig paraformer; 190 final OnlineParaformerModelConfig paraformer;
166 final OnlineZipformer2CtcModelConfig zipformer2Ctc; 191 final OnlineZipformer2CtcModelConfig zipformer2Ctc;
167 final OnlineNemoCtcModelConfig nemoCtc; 192 final OnlineNemoCtcModelConfig nemoCtc;
  193 + final OnlineToneCtcModelConfig toneCtc;
168 194
169 final String tokens; 195 final String tokens;
170 196
@@ -362,6 +388,9 @@ class OnlineRecognizer { @@ -362,6 +388,9 @@ class OnlineRecognizer {
362 // nemoCtc 388 // nemoCtc
363 c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8(); 389 c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8();
364 390
  391 + // toneCtc
  392 + c.ref.model.toneCtc.model = config.model.toneCtc.model.toNativeUtf8();
  393 +
365 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 394 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
366 c.ref.model.numThreads = config.model.numThreads; 395 c.ref.model.numThreads = config.model.numThreads;
367 c.ref.model.provider = config.model.provider.toNativeUtf8(); 396 c.ref.model.provider = config.model.provider.toNativeUtf8();
@@ -415,6 +444,7 @@ class OnlineRecognizer { @@ -415,6 +444,7 @@ class OnlineRecognizer {
415 calloc.free(c.ref.model.modelType); 444 calloc.free(c.ref.model.modelType);
416 calloc.free(c.ref.model.provider); 445 calloc.free(c.ref.model.provider);
417 calloc.free(c.ref.model.tokens); 446 calloc.free(c.ref.model.tokens);
  447 + calloc.free(c.ref.model.toneCtc.model);
418 calloc.free(c.ref.model.nemoCtc.model); 448 calloc.free(c.ref.model.nemoCtc.model);
419 calloc.free(c.ref.model.zipformer2Ctc.model); 449 calloc.free(c.ref.model.zipformer2Ctc.model);
420 calloc.free(c.ref.model.paraformer.encoder); 450 calloc.free(c.ref.model.paraformer.encoder);
@@ -403,6 +403,10 @@ final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct { @@ -403,6 +403,10 @@ final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct {
403 external Pointer<Utf8> model; 403 external Pointer<Utf8> model;
404 } 404 }
405 405
  406 +final class SherpaOnnxOnlineToneCtcModelConfig extends Struct {
  407 + external Pointer<Utf8> model;
  408 +}
  409 +
406 final class SherpaOnnxOnlineModelConfig extends Struct { 410 final class SherpaOnnxOnlineModelConfig extends Struct {
407 external SherpaOnnxOnlineTransducerModelConfig transducer; 411 external SherpaOnnxOnlineTransducerModelConfig transducer;
408 external SherpaOnnxOnlineParaformerModelConfig paraformer; 412 external SherpaOnnxOnlineParaformerModelConfig paraformer;
@@ -430,6 +434,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct { @@ -430,6 +434,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct {
430 external int tokensBufSize; 434 external int tokensBufSize;
431 435
432 external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc; 436 external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc;
  437 +
  438 + external SherpaOnnxOnlineToneCtcModelConfig toneCtc;
433 } 439 }
434 440
435 final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { 441 final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct {
@@ -27,6 +27,7 @@ func main() { @@ -27,6 +27,7 @@ func main() {
27 flag.StringVar(&config.ModelConfig.Paraformer.Encoder, "paraformer-encoder", "", "Path to the paraformer encoder model") 27 flag.StringVar(&config.ModelConfig.Paraformer.Encoder, "paraformer-encoder", "", "Path to the paraformer encoder model")
28 flag.StringVar(&config.ModelConfig.Paraformer.Decoder, "paraformer-decoder", "", "Path to the paraformer decoder model") 28 flag.StringVar(&config.ModelConfig.Paraformer.Decoder, "paraformer-decoder", "", "Path to the paraformer decoder model")
29 flag.StringVar(&config.ModelConfig.Zipformer2Ctc.Model, "zipformer2-ctc", "", "Path to the zipformer2 CTC model") 29 flag.StringVar(&config.ModelConfig.Zipformer2Ctc.Model, "zipformer2-ctc", "", "Path to the zipformer2 CTC model")
  30 + flag.StringVar(&config.ModelConfig.ToneCtc.Model, "t-one-ctc", "", "Path to the T-one CTC model")
30 flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") 31 flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
31 flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") 32 flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
32 flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") 33 flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
@@ -59,9 +60,12 @@ func main() { @@ -59,9 +60,12 @@ func main() {
59 stream := sherpa.NewOnlineStream(recognizer) 60 stream := sherpa.NewOnlineStream(recognizer)
60 defer sherpa.DeleteOnlineStream(stream) 61 defer sherpa.DeleteOnlineStream(stream)
61 62
  63 + leftPadding := make([]float32, int(float32(sampleRate)*0.3))
  64 + stream.AcceptWaveform(sampleRate, leftPadding)
  65 +
62 stream.AcceptWaveform(sampleRate, samples) 66 stream.AcceptWaveform(sampleRate, samples)
63 67
64 - tailPadding := make([]float32, int(float32(sampleRate)*0.3)) 68 + tailPadding := make([]float32, int(float32(sampleRate)*0.6))
65 stream.AcceptWaveform(sampleRate, tailPadding) 69 stream.AcceptWaveform(sampleRate, tailPadding)
66 70
67 for recognizer.IsReady(stream) { 71 for recognizer.IsReady(stream) {
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +
  6 +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  7 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  8 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  9 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  10 +fi
  11 +
  12 +go mod tidy
  13 +go build
  14 +
  15 +./streaming-decode-files \
  16 + --t-one-ctc ./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx \
  17 + --tokens ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt \
  18 + ./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav
@@ -26,8 +26,9 @@ export { Samples, @@ -26,8 +26,9 @@ export { Samples,
26 26
27 export { OnlineStream, 27 export { OnlineStream,
28 OnlineNemoCtcModelConfig, 28 OnlineNemoCtcModelConfig,
29 - OnlineTransducerModelConfig,  
30 OnlineParaformerModelConfig, 29 OnlineParaformerModelConfig,
  30 + OnlineToneCtcModelConfig,
  31 + OnlineTransducerModelConfig,
31 OnlineZipformer2CtcModelConfig, 32 OnlineZipformer2CtcModelConfig,
32 OnlineModelConfig, 33 OnlineModelConfig,
33 OnlineCtcFstDecoderConfig, 34 OnlineCtcFstDecoderConfig,
@@ -89,6 +89,22 @@ static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig( @@ -89,6 +89,22 @@ static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig(
89 return c; 89 return c;
90 } 90 }
91 91
  92 +static SherpaOnnxOnlineToneCtcModelConfig GetOnlineToneCtcModelConfig(
  93 + Napi::Object obj) {
  94 + SherpaOnnxOnlineToneCtcModelConfig c;
  95 + memset(&c, 0, sizeof(c));
  96 +
  97 + if (!obj.Has("toneCtc") || !obj.Get("toneCtc").IsObject()) {
  98 + return c;
  99 + }
  100 +
  101 + Napi::Object o = obj.Get("toneCtc").As<Napi::Object>();
  102 +
  103 + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  104 +
  105 + return c;
  106 +}
  107 +
92 static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( 108 static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig(
93 Napi::Object obj) { 109 Napi::Object obj) {
94 SherpaOnnxOnlineParaformerModelConfig c; 110 SherpaOnnxOnlineParaformerModelConfig c;
@@ -120,6 +136,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { @@ -120,6 +136,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
120 c.paraformer = GetOnlineParaformerModelConfig(o); 136 c.paraformer = GetOnlineParaformerModelConfig(o);
121 c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o); 137 c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
122 c.nemo_ctc = GetOnlineNemoCtcModelConfig(o); 138 c.nemo_ctc = GetOnlineNemoCtcModelConfig(o);
  139 + c.t_one_ctc = GetOnlineToneCtcModelConfig(o);
123 140
124 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); 141 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
125 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); 142 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -265,6 +282,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( @@ -265,6 +282,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
265 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder); 282 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder);
266 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder); 283 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder);
267 284
  285 + SHERPA_ONNX_DELETE_C_STR(c.model_config.t_one_ctc.model);
268 SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model); 286 SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
269 SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model); 287 SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model);
270 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); 288 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
@@ -50,11 +50,16 @@ export class OnlineNemoCtcModelConfig { @@ -50,11 +50,16 @@ export class OnlineNemoCtcModelConfig {
50 public model: string = ''; 50 public model: string = '';
51 } 51 }
52 52
  53 +export class OnlineToneCtcModelConfig {
  54 + public model: string = '';
  55 +}
  56 +
53 export class OnlineModelConfig { 57 export class OnlineModelConfig {
54 public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig(); 58 public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig();
55 public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig(); 59 public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig();
56 - public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();  
57 - public nemo_ctc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig(); 60 + public zipformer2Ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();
  61 + public nemoCtc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig();
  62 + public toneCtc: OnlineToneCtcModelConfig = new OnlineToneCtcModelConfig();
58 public tokens: string = ''; 63 public tokens: string = '';
59 public numThreads: number = 1; 64 public numThreads: number = 1;
60 public provider: string = 'cpu'; 65 public provider: string = 'cpu';
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use an online T-one CTC model, i.e.,
  4 +// streaming T-one CTC model, to decode files.
  5 +import com.k2fsa.sherpa.onnx.*;
  6 +
  7 +public class StreamingDecodeFileToneCtc {
  8 + public static void main(String[] args) {
  9 + String model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx";
  10 + String tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt";
  11 + String waveFilename = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav";
  12 +
  13 + WaveReader reader = new WaveReader(waveFilename);
  14 +
  15 + OnlineToneCtcModelConfig ctc = OnlineToneCtcModelConfig.builder().setModel(model).build();
  16 +
  17 + OnlineModelConfig modelConfig =
  18 + OnlineModelConfig.builder()
  19 + .setToneCtc(ctc)
  20 + .setTokens(tokens)
  21 + .setNumThreads(1)
  22 + .setDebug(true)
  23 + .build();
  24 +
  25 + OnlineRecognizerConfig config =
  26 + OnlineRecognizerConfig.builder()
  27 + .setOnlineModelConfig(modelConfig)
  28 + .setDecodingMethod("greedy_search")
  29 + .build();
  30 +
  31 + OnlineRecognizer recognizer = new OnlineRecognizer(config);
  32 + OnlineStream stream = recognizer.createStream();
  33 +
  34 + float[] leftPaddings = new float[(int) (0.3 * reader.getSampleRate())];
  35 + stream.acceptWaveform(leftPaddings, reader.getSampleRate());
  36 +
  37 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  38 +
  39 + float[] tailPaddings = new float[(int) (0.6 * reader.getSampleRate())];
  40 + stream.acceptWaveform(tailPaddings, reader.getSampleRate());
  41 +
  42 + while (recognizer.isReady(stream)) {
  43 + recognizer.decode(stream);
  44 + }
  45 +
  46 + String text = recognizer.getResult(stream).getText();
  47 +
  48 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  49 +
  50 + stream.release();
  51 + recognizer.release();
  52 + }
  53 +}
  1 +#!/usr/bin/env bash
  2 +set -ex
  3 +
  4 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  5 + mkdir -p ../build
  6 + pushd ../build
  7 + cmake \
  8 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  9 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  10 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  11 + -DBUILD_SHARED_LIBS=ON \
  12 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  13 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  14 + ..
  15 +
  16 + make -j4
  17 + ls -lh lib
  18 + popd
  19 +fi
  20 +
  21 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  22 + pushd ../sherpa-onnx/java-api
  23 + make
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  30 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  31 +fi
  32 +
  33 +java \
  34 + -Djava.library.path=$PWD/../build/lib \
  35 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  36 + StreamingDecodeFileToneCtc.java
@@ -72,6 +72,12 @@ function testSpeakerEmbeddingExtractor() { @@ -72,6 +72,12 @@ function testSpeakerEmbeddingExtractor() {
72 72
73 73
74 function testOnlineAsr() { 74 function testOnlineAsr() {
  75 + if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  76 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  77 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  78 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  79 + fi
  80 +
75 if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then 81 if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then
76 git lfs install 82 git lfs install
77 GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 83 GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21
@@ -5,6 +5,7 @@ fun main() { @@ -5,6 +5,7 @@ fun main() {
5 testOnlineAsr("zipformer2-ctc") 5 testOnlineAsr("zipformer2-ctc")
6 testOnlineAsr("ctc-hlg") 6 testOnlineAsr("ctc-hlg")
7 testOnlineAsr("nemo-ctc") 7 testOnlineAsr("nemo-ctc")
  8 + testOnlineAsr("tone-ctc")
8 } 9 }
9 10
10 fun testOnlineAsr(type: String) { 11 fun testOnlineAsr(type: String) {
@@ -54,6 +55,17 @@ fun testOnlineAsr(type: String) { @@ -54,6 +55,17 @@ fun testOnlineAsr(type: String) {
54 debug = false, 55 debug = false,
55 ) 56 )
56 } 57 }
  58 + "tone-ctc" -> {
  59 + waveFilename = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"
  60 + OnlineModelConfig(
  61 + toneCtc = OnlineToneCtcModelConfig(
  62 + model = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx",
  63 + ),
  64 + tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt",
  65 + numThreads = 1,
  66 + debug = false,
  67 + )
  68 + }
57 "ctc-hlg" -> { 69 "ctc-hlg" -> {
58 waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav" 70 waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav"
59 ctcFstDecoderConfig.graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst" 71 ctcFstDecoderConfig.graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"
@@ -95,12 +107,16 @@ fun testOnlineAsr(type: String) { @@ -95,12 +107,16 @@ fun testOnlineAsr(type: String) {
95 val sampleRate: Int = objArray[1] as Int 107 val sampleRate: Int = objArray[1] as Int
96 108
97 val stream = recognizer.createStream() 109 val stream = recognizer.createStream()
  110 +
  111 + val leftPaddings = FloatArray((sampleRate * 0.3).toInt()) // 0.3 seconds
  112 + stream.acceptWaveform(leftPaddings, sampleRate = sampleRate)
  113 +
98 stream.acceptWaveform(samples, sampleRate = sampleRate) 114 stream.acceptWaveform(samples, sampleRate = sampleRate)
99 while (recognizer.isReady(stream)) { 115 while (recognizer.isReady(stream)) {
100 recognizer.decode(stream) 116 recognizer.decode(stream)
101 } 117 }
102 118
103 - val tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds 119 + val tailPaddings = FloatArray((sampleRate * 0.6).toInt()) // 0.6 seconds
104 stream.acceptWaveform(tailPaddings, sampleRate = sampleRate) 120 stream.acceptWaveform(tailPaddings, sampleRate = sampleRate)
105 stream.inputFinished() 121 stream.inputFinished()
106 while (recognizer.isReady(stream)) { 122 while (recognizer.isReady(stream)) {
@@ -97,6 +97,7 @@ The following tables list the examples in this folder. @@ -97,6 +97,7 @@ The following tables list the examples in this folder.
97 97
98 |File| Description| 98 |File| Description|
99 |---|---| 99 |---|---|
  100 +|[./test_asr_streaming_t_one_ctc.js](./test_asr_streaming_t_one_ctc.js)| Streaming speech recognition from a file using a T-one CTC model|
100 |[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model| 101 |[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model|
101 |[./test_asr_streaming_transducer_with_hr.js](./test_asr_streaming_transducer_with_hr.js)| Streaming speech recognition from a file using a Zipformer transducer model with homophone replacer| 102 |[./test_asr_streaming_transducer_with_hr.js](./test_asr_streaming_transducer_with_hr.js)| Streaming speech recognition from a file using a Zipformer transducer model with homophone replacer|
102 |[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search| 103 |[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search|
@@ -230,6 +231,16 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lex @@ -230,6 +231,16 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lex
230 node ./test_asr_streaming_transducer_with_hr.js 231 node ./test_asr_streaming_transducer_with_hr.js
231 ``` 232 ```
232 233
  234 +### Streaming speech recognition with T-one CTC
  235 +
  236 +```bash
  237 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  238 +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  239 +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  240 +
  241 +node ./test_asr_streaming_t_one_ctc.js
  242 +```
  243 +
233 ### Streaming speech recognition with Zipformer transducer 244 ### Streaming speech recognition with Zipformer transducer
234 245
235 ```bash 246 ```bash
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +
  4 +// Please download test files from
  5 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  6 +const config = {
  7 + 'modelConfig': {
  8 + 'toneCtc': {
  9 + 'model': './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx',
  10 + },
  11 + 'tokens': './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt',
  12 + 'numThreads': 2,
  13 + 'provider': 'cpu',
  14 + 'debug': 1,
  15 + }
  16 +};
  17 +
  18 +const waveFilename = './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav';
  19 +
  20 +const recognizer = new sherpa_onnx.OnlineRecognizer(config);
  21 +console.log('Started')
  22 +let start = Date.now();
  23 +const stream = recognizer.createStream();
  24 +const wave = sherpa_onnx.readWave(waveFilename);
  25 +
  26 +const leftPadding = new Float32Array(wave.sampleRate * 0.3);
  27 +stream.acceptWaveform({samples: leftPadding, sampleRate: wave.sampleRate});
  28 +
  29 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  30 +
  31 +const tailPadding = new Float32Array(wave.sampleRate * 0.6);
  32 +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
  33 +
  34 +while (recognizer.isReady(stream)) {
  35 + recognizer.decode(stream);
  36 +}
  37 +result = recognizer.getResult(stream)
  38 +let stop = Date.now();
  39 +console.log('Done')
  40 +
  41 +const elapsed_seconds = (stop - start) / 1000;
  42 +const duration = wave.samples.length / wave.sampleRate;
  43 +const real_time_factor = elapsed_seconds / duration;
  44 +console.log('Wave duration', duration.toFixed(3), 'seconds')
  45 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
  46 +console.log(
  47 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  48 + real_time_factor.toFixed(3))
  49 +console.log(waveFilename)
  50 +console.log('result\n', result)
@@ -393,6 +393,18 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 @@ -393,6 +393,18 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
393 node ./test-online-paraformer-microphone-mic.js 393 node ./test-online-paraformer-microphone-mic.js
394 ``` 394 ```
395 395
  396 +## ./test-online-t-one-ctc.js
  397 +[./test-online-t-one-ctc.js](./test-online-t-one-ctc.js) demonstrates
  398 +how to decode a file using a streaming T-one model.
  399 +
  400 +You can use the following command to run it:
  401 +
  402 +```bash
  403 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  404 +tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  405 +rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  406 +node ./test-online-t-one-ctc.js
  407 +```
396 408
397 ## ./test-online-paraformer.js 409 ## ./test-online-paraformer.js
398 [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates 410 [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createOnlineRecognizer() {
  10 + let toneCtc = {
  11 + model: './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx',
  12 + };
  13 +
  14 + let onlineModelConfig = {
  15 + toneCtc: toneCtc,
  16 + tokens: './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt',
  17 + numThreads: 1,
  18 + provider: 'cpu',
  19 + debug: 1,
  20 + };
  21 +
  22 +
  23 + let recognizerConfig = {
  24 + modelConfig: onlineModelConfig,
  25 + decodingMethod: 'greedy_search',
  26 + maxActivePaths: 4,
  27 + enableEndpoint: 1,
  28 + rule1MinTrailingSilence: 2.4,
  29 + rule2MinTrailingSilence: 1.2,
  30 + rule3MinUtteranceLength: 20,
  31 + };
  32 +
  33 + return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
  34 +}
  35 +
  36 +const recognizer = createOnlineRecognizer();
  37 +const stream = recognizer.createStream();
  38 +
  39 +const waveFilename = './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav';
  40 +const wave = sherpa_onnx.readWave(waveFilename);
  41 +
  42 +const leftPadding = new Float32Array(wave.sampleRate * 0.3);
  43 +const tailPadding = new Float32Array(wave.sampleRate * 0.6);
  44 +
  45 +stream.acceptWaveform(wave.sampleRate, leftPadding);
  46 +stream.acceptWaveform(wave.sampleRate, wave.samples);
  47 +stream.acceptWaveform(wave.sampleRate, tailPadding);
  48 +
  49 +while (recognizer.isReady(stream)) {
  50 + recognizer.decode(stream);
  51 +}
  52 +const text = recognizer.getResult(stream).text;
  53 +console.log(text);
  54 +
  55 +stream.free();
  56 +recognizer.free();
@@ -4,3 +4,4 @@ paraformer @@ -4,3 +4,4 @@ paraformer
4 zipformer_ctc 4 zipformer_ctc
5 zipformer_ctc_hlg 5 zipformer_ctc_hlg
6 nemo_transducer 6 nemo_transducer
  7 +t_one_ctc
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  29 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  30 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -dSHERPA_ONNX_USE_SHARED_LIBS \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./t_one_ctc.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./t_one_ctc
  1 +{ Copyright (c) 2025 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a streaming T-one CTC model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program t_one_ctc;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Config: TSherpaOnnxOnlineRecognizerConfig;
  22 + Recognizer: TSherpaOnnxOnlineRecognizer;
  23 + Stream: TSherpaOnnxOnlineStream;
  24 + RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
  25 + Wave: TSherpaOnnxWave;
  26 + WaveFilename: AnsiString;
  27 + LeftPaddings: array of Single;
  28 + TailPaddings: array of Single;
  29 +
  30 + Start: TDateTime;
  31 + Stop: TDateTime;
  32 +
  33 + Elapsed: Single;
  34 + Duration: Single;
  35 + RealTimeFactor: Single;
  36 +begin
  37 + Initialize(Config);
  38 +
  39 + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  40 + to download model files used in this file.}
  41 + Config.ModelConfig.ToneCtc.Model := './sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx';
  42 + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt';
  43 + Config.ModelConfig.Provider := 'cpu';
  44 + Config.ModelConfig.NumThreads := 1;
  45 + Config.ModelConfig.Debug := False;
  46 +
  47 + WaveFilename := './sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav';
  48 +
  49 + Wave := SherpaOnnxReadWave(WaveFilename);
  50 +
  51 + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
  52 +
  53 + Start := Now;
  54 +
  55 + Stream := Recognizer.CreateStream();
  56 +
  57 + SetLength(LeftPaddings, Round(Wave.SampleRate * 0.3)); {0.3 seconds of padding}
  58 + Stream.AcceptWaveform(LeftPaddings, Wave.SampleRate);
  59 +
  60 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  61 +
  62 + SetLength(TailPaddings, Round(Wave.SampleRate * 0.6)); {0.6 seconds of padding}
  63 + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
  64 +
  65 + Stream.InputFinished();
  66 +
  67 + while Recognizer.IsReady(Stream) do
  68 + Recognizer.Decode(Stream);
  69 +
  70 + RecognitionResult := Recognizer.GetResult(Stream);
  71 +
  72 + Stop := Now;
  73 +
  74 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  75 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  76 + RealTimeFactor := Elapsed / Duration;
  77 +
  78 + WriteLn(RecognitionResult.ToString);
  79 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  80 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  81 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  82 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  83 +
  84 + {Free resources to avoid memory leak.
  85 +
  86 + Note: You don't need to invoke them for this simple script.
  87 + However, you have to invoke them in your own large/complex project.
  88 + }
  89 + FreeAndNil(Stream);
  90 + FreeAndNil(Recognizer);
  91 +end.
@@ -469,6 +469,21 @@ def get_models(): @@ -469,6 +469,21 @@ def get_models():
469 popd 469 popd
470 """, 470 """,
471 ), 471 ),
  472 + Model(
  473 + model_name="sherpa-onnx-streaming-t-one-russian-2025-09-08",
  474 + idx=27,
  475 + lang="ru",
  476 + short_name="t_one_ctc_2025_09_08",
  477 + cmd="""
  478 + pushd $model_name
  479 +
  480 + rm -v *.wav
  481 +
  482 + ls -lh
  483 +
  484 + popd
  485 + """,
  486 + ),
472 ] 487 ]
473 488
474 return models 489 return models
@@ -25,6 +25,7 @@ namespace SherpaOnnx @@ -25,6 +25,7 @@ namespace SherpaOnnx
25 TokensBuf = ""; 25 TokensBuf = "";
26 TokensBufSize = 0; 26 TokensBufSize = 0;
27 NemoCtc = new OnlineNemoCtcModelConfig(); 27 NemoCtc = new OnlineNemoCtcModelConfig();
  28 + ToneCtc = new OnlineToneCtcModelConfig();
28 } 29 }
29 30
30 public OnlineTransducerModelConfig Transducer; 31 public OnlineTransducerModelConfig Transducer;
@@ -58,6 +59,8 @@ namespace SherpaOnnx @@ -58,6 +59,8 @@ namespace SherpaOnnx
58 public int TokensBufSize; 59 public int TokensBufSize;
59 60
60 public OnlineNemoCtcModelConfig NemoCtc; 61 public OnlineNemoCtcModelConfig NemoCtc;
  62 +
  63 + public OnlineToneCtcModelConfig ToneCtc;
61 } 64 }
62 65
63 } 66 }
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct OnlineToneCtcModelConfig
  9 + {
  10 + public OnlineToneCtcModelConfig()
  11 + {
  12 + Model = "";
  13 + }
  14 +
  15 + [MarshalAs(UnmanagedType.LPStr)]
  16 + public string Model;
  17 + }
  18 +}
  1 +../../../../go-api-examples/non-streaming-tts/run-kitten-en.sh
  1 +../../../../go-api-examples/streaming-decode-files/run-t-one-ctc.sh
@@ -81,6 +81,10 @@ type OnlineNemoCtcModelConfig struct { @@ -81,6 +81,10 @@ type OnlineNemoCtcModelConfig struct {
81 Model string // Path to the onnx model 81 Model string // Path to the onnx model
82 } 82 }
83 83
  84 +type OnlineToneCtcModelConfig struct {
  85 + Model string // Path to the onnx model
  86 +}
  87 +
84 // Configuration for online/streaming models 88 // Configuration for online/streaming models
85 // 89 //
86 // Please refer to 90 // Please refer to
@@ -92,6 +96,7 @@ type OnlineModelConfig struct { @@ -92,6 +96,7 @@ type OnlineModelConfig struct {
92 Paraformer OnlineParaformerModelConfig 96 Paraformer OnlineParaformerModelConfig
93 Zipformer2Ctc OnlineZipformer2CtcModelConfig 97 Zipformer2Ctc OnlineZipformer2CtcModelConfig
94 NemoCtc OnlineNemoCtcModelConfig 98 NemoCtc OnlineNemoCtcModelConfig
  99 + ToneCtc OnlineToneCtcModelConfig
95 Tokens string // Path to tokens.txt 100 Tokens string // Path to tokens.txt
96 NumThreads int // Number of threads to use for neural network computation 101 NumThreads int // Number of threads to use for neural network computation
97 Provider string // Optional. Valid values are: cpu, cuda, coreml 102 Provider string // Optional. Valid values are: cpu, cuda, coreml
@@ -205,6 +210,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { @@ -205,6 +210,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
205 c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model) 210 c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model)
206 defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) 211 defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))
207 212
  213 + c.model_config.t_one_ctc.model = C.CString(config.ModelConfig.ToneCtc.Model)
  214 + defer C.free(unsafe.Pointer(c.model_config.t_one_ctc.model))
  215 +
208 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 216 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
209 defer C.free(unsafe.Pointer(c.model_config.tokens)) 217 defer C.free(unsafe.Pointer(c.model_config.tokens))
210 218
@@ -100,6 +100,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( @@ -100,6 +100,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
100 recognizer_config.model_config.nemo_ctc.model = 100 recognizer_config.model_config.nemo_ctc.model =
101 SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, ""); 101 SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
102 102
  103 + recognizer_config.model_config.t_one_ctc.model =
  104 + SHERPA_ONNX_OR(config->model_config.t_one_ctc.model, "");
  105 +
103 recognizer_config.model_config.num_threads = 106 recognizer_config.model_config.num_threads =
104 SHERPA_ONNX_OR(config->model_config.num_threads, 1); 107 SHERPA_ONNX_OR(config->model_config.num_threads, 1);
105 recognizer_config.model_config.provider_config.provider = 108 recognizer_config.model_config.provider_config.provider =
@@ -691,8 +694,7 @@ const SherpaOnnxOfflineRecognizerResult *SherpaOnnxGetOfflineStreamResult( @@ -691,8 +694,7 @@ const SherpaOnnxOfflineRecognizerResult *SherpaOnnxGetOfflineStreamResult(
691 694
692 if (!result.durations.empty() && result.durations.size() == r->count) { 695 if (!result.durations.empty() && result.durations.size() == r->count) {
693 r->durations = new float[r->count]; 696 r->durations = new float[r->count];
694 - std::copy(result.durations.begin(), result.durations.end(),  
695 - r->durations); 697 + std::copy(result.durations.begin(), result.durations.end(), r->durations);
696 } else { 698 } else {
697 r->durations = nullptr; 699 r->durations = nullptr;
698 } 700 }
@@ -104,6 +104,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineNemoCtcModelConfig { @@ -104,6 +104,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineNemoCtcModelConfig {
104 const char *model; 104 const char *model;
105 } SherpaOnnxOnlineNemoCtcModelConfig; 105 } SherpaOnnxOnlineNemoCtcModelConfig;
106 106
  107 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineToneCtcModelConfig {
  108 + const char *model;
  109 +} SherpaOnnxOnlineToneCtcModelConfig;
  110 +
107 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { 111 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig {
108 SherpaOnnxOnlineTransducerModelConfig transducer; 112 SherpaOnnxOnlineTransducerModelConfig transducer;
109 SherpaOnnxOnlineParaformerModelConfig paraformer; 113 SherpaOnnxOnlineParaformerModelConfig paraformer;
@@ -125,6 +129,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { @@ -125,6 +129,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig {
125 /// byte size excluding the trailing '\0' 129 /// byte size excluding the trailing '\0'
126 int32_t tokens_buf_size; 130 int32_t tokens_buf_size;
127 SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc; 131 SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc;
  132 + SherpaOnnxOnlineToneCtcModelConfig t_one_ctc;
128 } SherpaOnnxOnlineModelConfig; 133 } SherpaOnnxOnlineModelConfig;
129 134
130 /// It expects 16 kHz 16-bit single channel wave format. 135 /// It expects 16 kHz 16-bit single channel wave format.
@@ -70,6 +70,7 @@ OnlineRecognizer OnlineRecognizer::Create( @@ -70,6 +70,7 @@ OnlineRecognizer OnlineRecognizer::Create(
70 config.model_config.zipformer2_ctc.model.c_str(); 70 config.model_config.zipformer2_ctc.model.c_str();
71 71
72 c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str(); 72 c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();
  73 + c.model_config.t_one_ctc.model = config.model_config.t_one_ctc.model.c_str();
73 74
74 c.model_config.tokens = config.model_config.tokens.c_str(); 75 c.model_config.tokens = config.model_config.tokens.c_str();
75 c.model_config.num_threads = config.model_config.num_threads; 76 c.model_config.num_threads = config.model_config.num_threads;
@@ -36,11 +36,16 @@ struct OnlineNemoCtcModelConfig { @@ -36,11 +36,16 @@ struct OnlineNemoCtcModelConfig {
36 std::string model; 36 std::string model;
37 }; 37 };
38 38
  39 +struct OnlineToneCtcModelConfig {
  40 + std::string model;
  41 +};
  42 +
39 struct OnlineModelConfig { 43 struct OnlineModelConfig {
40 OnlineTransducerModelConfig transducer; 44 OnlineTransducerModelConfig transducer;
41 OnlineParaformerModelConfig paraformer; 45 OnlineParaformerModelConfig paraformer;
42 OnlineZipformer2CtcModelConfig zipformer2_ctc; 46 OnlineZipformer2CtcModelConfig zipformer2_ctc;
43 OnlineNemoCtcModelConfig nemo_ctc; 47 OnlineNemoCtcModelConfig nemo_ctc;
  48 + OnlineToneCtcModelConfig t_one_ctc;
44 std::string tokens; 49 std::string tokens;
45 int32_t num_threads = 1; 50 int32_t num_threads = 1;
46 std::string provider = "cpu"; 51 std::string provider = "cpu";
@@ -19,6 +19,7 @@ java_files += HomophoneReplacerConfig.java @@ -19,6 +19,7 @@ java_files += HomophoneReplacerConfig.java
19 java_files += OnlineLMConfig.java 19 java_files += OnlineLMConfig.java
20 java_files += OnlineParaformerModelConfig.java 20 java_files += OnlineParaformerModelConfig.java
21 java_files += OnlineZipformer2CtcModelConfig.java 21 java_files += OnlineZipformer2CtcModelConfig.java
  22 +java_files += OnlineToneCtcModelConfig.java
22 java_files += OnlineNeMoCtcModelConfig.java 23 java_files += OnlineNeMoCtcModelConfig.java
23 java_files += OnlineTransducerModelConfig.java 24 java_files += OnlineTransducerModelConfig.java
24 java_files += OnlineModelConfig.java 25 java_files += OnlineModelConfig.java
@@ -237,6 +237,7 @@ public class LibraryUtils { @@ -237,6 +237,7 @@ public class LibraryUtils {
237 dir.deleteOnExit(); // schedule the directory itself 237 dir.deleteOnExit(); // schedule the directory itself
238 } 238 }
239 239
  240 + static boolean isAndroid() {
240 String vmName = System.getProperty("java.vm.name", "").toLowerCase(Locale.ROOT); 241 String vmName = System.getProperty("java.vm.name", "").toLowerCase(Locale.ROOT);
241 String specVendor = System.getProperty("java.specification.vendor", ""); 242 String specVendor = System.getProperty("java.specification.vendor", "");
242 return vmName.contains("dalvik") || vmName.contains("art") || 243 return vmName.contains("dalvik") || vmName.contains("art") ||
@@ -8,6 +8,7 @@ public class OnlineModelConfig { @@ -8,6 +8,7 @@ public class OnlineModelConfig {
8 private final OnlineParaformerModelConfig paraformer; 8 private final OnlineParaformerModelConfig paraformer;
9 private final OnlineZipformer2CtcModelConfig zipformer2Ctc; 9 private final OnlineZipformer2CtcModelConfig zipformer2Ctc;
10 private final OnlineNeMoCtcModelConfig neMoCtc; 10 private final OnlineNeMoCtcModelConfig neMoCtc;
  11 + private final OnlineToneCtcModelConfig toneCtc;
11 private final String tokens; 12 private final String tokens;
12 private final int numThreads; 13 private final int numThreads;
13 private final boolean debug; 14 private final boolean debug;
@@ -21,6 +22,7 @@ public class OnlineModelConfig { @@ -21,6 +22,7 @@ public class OnlineModelConfig {
21 this.paraformer = builder.paraformer; 22 this.paraformer = builder.paraformer;
22 this.zipformer2Ctc = builder.zipformer2Ctc; 23 this.zipformer2Ctc = builder.zipformer2Ctc;
23 this.neMoCtc = builder.neMoCtc; 24 this.neMoCtc = builder.neMoCtc;
  25 + this.toneCtc = builder.toneCtc;
24 this.tokens = builder.tokens; 26 this.tokens = builder.tokens;
25 this.numThreads = builder.numThreads; 27 this.numThreads = builder.numThreads;
26 this.debug = builder.debug; 28 this.debug = builder.debug;
@@ -50,6 +52,10 @@ public class OnlineModelConfig { @@ -50,6 +52,10 @@ public class OnlineModelConfig {
50 return neMoCtc; 52 return neMoCtc;
51 } 53 }
52 54
  55 + public OnlineToneCtcModelConfig getToneCtc() {
  56 + return toneCtc;
  57 + }
  58 +
53 public String getTokens() { 59 public String getTokens() {
54 return tokens; 60 return tokens;
55 } 61 }
@@ -83,6 +89,7 @@ public class OnlineModelConfig { @@ -83,6 +89,7 @@ public class OnlineModelConfig {
83 private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build(); 89 private OnlineTransducerModelConfig transducer = OnlineTransducerModelConfig.builder().build();
84 private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build(); 90 private OnlineZipformer2CtcModelConfig zipformer2Ctc = OnlineZipformer2CtcModelConfig.builder().build();
85 private OnlineNeMoCtcModelConfig neMoCtc = OnlineNeMoCtcModelConfig.builder().build(); 91 private OnlineNeMoCtcModelConfig neMoCtc = OnlineNeMoCtcModelConfig.builder().build();
  92 + private OnlineToneCtcModelConfig toneCtc = OnlineToneCtcModelConfig.builder().build();
86 private String tokens = ""; 93 private String tokens = "";
87 private int numThreads = 1; 94 private int numThreads = 1;
88 private boolean debug = true; 95 private boolean debug = true;
@@ -115,6 +122,11 @@ public class OnlineModelConfig { @@ -115,6 +122,11 @@ public class OnlineModelConfig {
115 return this; 122 return this;
116 } 123 }
117 124
  125 + public Builder setToneCtc(OnlineToneCtcModelConfig toneCtc) {
  126 + this.toneCtc = toneCtc;
  127 + return this;
  128 + }
  129 +
118 public Builder setTokens(String tokens) { 130 public Builder setTokens(String tokens) {
119 this.tokens = tokens; 131 this.tokens = tokens;
120 return this; 132 return this;
@@ -28,5 +28,4 @@ public class OnlineNeMoCtcModelConfig { @@ -28,5 +28,4 @@ public class OnlineNeMoCtcModelConfig {
28 return this; 28 return this;
29 } 29 }
30 } 30 }
31 -  
32 -} 31 +}
  1 +package com.k2fsa.sherpa.onnx;
  2 +
  3 +public class OnlineToneCtcModelConfig {
  4 + private final String model;
  5 +
  6 + private OnlineToneCtcModelConfig(Builder builder) {
  7 + this.model = builder.model;
  8 + }
  9 +
  10 + public static Builder builder() {
  11 + return new Builder();
  12 + }
  13 +
  14 + public String getModel() {
  15 + return model;
  16 + }
  17 +
  18 + public static class Builder {
  19 + private String model = "";
  20 +
  21 + public OnlineToneCtcModelConfig build() {
  22 + return new OnlineToneCtcModelConfig(this);
  23 + }
  24 +
  25 + public Builder setModel(String model) {
  26 + this.model = model;
  27 + return this;
  28 + }
  29 + }
  30 +}
@@ -82,6 +82,18 @@ OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls, @@ -82,6 +82,18 @@ OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls,
82 ans.nemo_ctc.model = p; 82 ans.nemo_ctc.model = p;
83 env->ReleaseStringUTFChars(s, p); 83 env->ReleaseStringUTFChars(s, p);
84 84
  85 + // streaming T-one CTC
  86 + fid = env->GetFieldID(model_config_cls, "toneCtc",
  87 + "Lcom/k2fsa/sherpa/onnx/OnlineToneCtcModelConfig;");
  88 + jobject t_one_ctc_config = env->GetObjectField(model_config, fid);
  89 + jclass t_one_ctc_config_cls = env->GetObjectClass(t_one_ctc_config);
  90 +
  91 + fid = env->GetFieldID(t_one_ctc_config_cls, "model", "Ljava/lang/String;");
  92 + s = (jstring)env->GetObjectField(t_one_ctc_config, fid);
  93 + p = env->GetStringUTFChars(s, nullptr);
  94 + ans.t_one_ctc.model = p;
  95 + env->ReleaseStringUTFChars(s, p);
  96 +
85 fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;"); 97 fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;");
86 s = (jstring)env->GetObjectField(model_config, fid); 98 s = (jstring)env->GetObjectField(model_config, fid);
87 p = env->GetStringUTFChars(s, nullptr); 99 p = env->GetStringUTFChars(s, nullptr);
@@ -33,11 +33,16 @@ data class OnlineNeMoCtcModelConfig( @@ -33,11 +33,16 @@ data class OnlineNeMoCtcModelConfig(
33 var model: String = "", 33 var model: String = "",
34 ) 34 )
35 35
  36 +data class OnlineToneCtcModelConfig(
  37 + var model: String = "",
  38 +)
  39 +
36 data class OnlineModelConfig( 40 data class OnlineModelConfig(
37 var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), 41 var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),
38 var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), 42 var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
39 var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(), 43 var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(),
40 var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(), 44 var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(),
  45 + var toneCtc: OnlineToneCtcModelConfig = OnlineToneCtcModelConfig(),
41 var tokens: String = "", 46 var tokens: String = "",
42 var numThreads: Int = 1, 47 var numThreads: Int = 1,
43 var debug: Boolean = false, 48 var debug: Boolean = false,
@@ -518,6 +523,16 @@ fun getModelConfig(type: Int): OnlineModelConfig? { @@ -518,6 +523,16 @@ fun getModelConfig(type: Int): OnlineModelConfig? {
518 ) 523 )
519 } 524 }
520 525
  526 + 27 -> {
  527 + val modelDir = "sherpa-onnx-streaming-t-one-russian-2025-09-08"
  528 + return OnlineModelConfig(
  529 + toneCtc = OnlineToneCtcModelConfig(
  530 + model = "$modelDir/model.onnx",
  531 + ),
  532 + tokens = "$modelDir/tokens.txt",
  533 + )
  534 + }
  535 +
521 1000 -> { 536 1000 -> {
522 val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" 537 val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20"
523 return OnlineModelConfig( 538 return OnlineModelConfig(
@@ -182,6 +182,11 @@ type @@ -182,6 +182,11 @@ type
182 function ToString: AnsiString; 182 function ToString: AnsiString;
183 end; 183 end;
184 184
  185 + TSherpaOnnxOnlineToneCtcModelConfig = record
  186 + Model: AnsiString;
  187 + function ToString: AnsiString;
  188 + end;
  189 +
185 TSherpaOnnxOnlineModelConfig = record 190 TSherpaOnnxOnlineModelConfig = record
186 Transducer: TSherpaOnnxOnlineTransducerModelConfig; 191 Transducer: TSherpaOnnxOnlineTransducerModelConfig;
187 Paraformer: TSherpaOnnxOnlineParaformerModelConfig; 192 Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
@@ -196,6 +201,7 @@ type @@ -196,6 +201,7 @@ type
196 TokensBuf: AnsiString; 201 TokensBuf: AnsiString;
197 TokensBufSize: Integer; 202 TokensBufSize: Integer;
198 NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig; 203 NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig;
  204 + ToneCtc: TSherpaOnnxOnlineToneCtcModelConfig;
199 function ToString: AnsiString; 205 function ToString: AnsiString;
200 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); 206 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig);
201 end; 207 end;
@@ -714,6 +720,10 @@ type @@ -714,6 +720,10 @@ type
714 Model: PAnsiChar; 720 Model: PAnsiChar;
715 end; 721 end;
716 722
  723 + SherpaOnnxOnlineToneCtcModelConfig = record
  724 + Model: PAnsiChar;
  725 + end;
  726 +
717 SherpaOnnxOnlineModelConfig= record 727 SherpaOnnxOnlineModelConfig= record
718 Transducer: SherpaOnnxOnlineTransducerModelConfig; 728 Transducer: SherpaOnnxOnlineTransducerModelConfig;
719 Paraformer: SherpaOnnxOnlineParaformerModelConfig; 729 Paraformer: SherpaOnnxOnlineParaformerModelConfig;
@@ -728,6 +738,7 @@ type @@ -728,6 +738,7 @@ type
728 TokensBuf: PAnsiChar; 738 TokensBuf: PAnsiChar;
729 TokensBufSize: cint32; 739 TokensBufSize: cint32;
730 NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig; 740 NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig;
  741 + ToneCtc: SherpaOnnxOnlineToneCtcModelConfig;
731 end; 742 end;
732 SherpaOnnxFeatureConfig = record 743 SherpaOnnxFeatureConfig = record
733 SampleRate: cint32; 744 SampleRate: cint32;
@@ -1350,6 +1361,12 @@ begin @@ -1350,6 +1361,12 @@ begin
1350 [Self.Model]); 1361 [Self.Model]);
1351 end; 1362 end;
1352 1363
  1364 +function TSherpaOnnxOnlineToneCtcModelConfig.ToString: AnsiString;
  1365 +begin
  1366 + Result := Format('TSherpaOnnxOnlineToneCtcModelConfig(Model := %s)',
  1367 + [Self.Model]);
  1368 +end;
  1369 +
1353 function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; 1370 function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
1354 begin 1371 begin
1355 Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + 1372 Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
@@ -1362,12 +1379,13 @@ begin @@ -1362,12 +1379,13 @@ begin
1362 'ModelType := %s, ' + 1379 'ModelType := %s, ' +
1363 'ModelingUnit := %s, ' + 1380 'ModelingUnit := %s, ' +
1364 'BpeVocab := %s, ' + 1381 'BpeVocab := %s, ' +
1365 - 'NemoCtc := %s)', 1382 + 'NemoCtc := %s, ' +
  1383 + 'ToneCtc := %s)',
1366 [Self.Transducer.ToString, Self.Paraformer.ToString, 1384 [Self.Transducer.ToString, Self.Paraformer.ToString,
1367 Self.Zipformer2Ctc.ToString, Self.Tokens, 1385 Self.Zipformer2Ctc.ToString, Self.Tokens,
1368 Self.NumThreads, Self.Provider, Self.Debug.ToString, 1386 Self.NumThreads, Self.Provider, Self.Debug.ToString,
1369 Self.ModelType, Self.ModelingUnit, Self.BpeVocab, 1387 Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
1370 - Self.NemoCtc.ToString 1388 + Self.NemoCtc.ToString, Self.ToneCtc.ToString
1371 ]); 1389 ]);
1372 end; 1390 end;
1373 1391
@@ -1467,6 +1485,7 @@ begin @@ -1467,6 +1485,7 @@ begin
1467 1485
1468 C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); 1486 C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
1469 C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model); 1487 C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model);
  1488 + C.ModelConfig.ToneCtc.Model := PAnsiChar(Config.ModelConfig.ToneCtc.Model);
1470 1489
1471 C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); 1490 C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
1472 C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; 1491 C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
@@ -22,3 +22,4 @@ zipformer-ctc-asr @@ -22,3 +22,4 @@ zipformer-ctc-asr
22 dolphin-ctc-asr 22 dolphin-ctc-asr
23 tts-kitten-en 23 tts-kitten-en
24 compute-speaker-embeddings 24 compute-speaker-embeddings
  25 +decode-file-t-one-streaming
@@ -76,6 +76,14 @@ func sherpaOnnxOnlineNemoCtcModelConfig( @@ -76,6 +76,14 @@ func sherpaOnnxOnlineNemoCtcModelConfig(
76 ) 76 )
77 } 77 }
78 78
  79 +func sherpaOnnxOnlineToneCtcModelConfig(
  80 + model: String = ""
  81 +) -> SherpaOnnxOnlineToneCtcModelConfig {
  82 + return SherpaOnnxOnlineToneCtcModelConfig(
  83 + model: toCPointer(model)
  84 + )
  85 +}
  86 +
79 /// Return an instance of SherpaOnnxOnlineModelConfig. 87 /// Return an instance of SherpaOnnxOnlineModelConfig.
80 /// 88 ///
81 /// Please refer to 89 /// Please refer to
@@ -101,7 +109,8 @@ func sherpaOnnxOnlineModelConfig( @@ -101,7 +109,8 @@ func sherpaOnnxOnlineModelConfig(
101 bpeVocab: String = "", 109 bpeVocab: String = "",
102 tokensBuf: String = "", 110 tokensBuf: String = "",
103 tokensBufSize: Int = 0, 111 tokensBufSize: Int = 0,
104 - nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig() 112 + nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig(),
  113 + toneCtc: SherpaOnnxOnlineToneCtcModelConfig = sherpaOnnxOnlineToneCtcModelConfig()
105 ) -> SherpaOnnxOnlineModelConfig { 114 ) -> SherpaOnnxOnlineModelConfig {
106 return SherpaOnnxOnlineModelConfig( 115 return SherpaOnnxOnlineModelConfig(
107 transducer: transducer, 116 transducer: transducer,
@@ -116,7 +125,8 @@ func sherpaOnnxOnlineModelConfig( @@ -116,7 +125,8 @@ func sherpaOnnxOnlineModelConfig(
116 bpe_vocab: toCPointer(bpeVocab), 125 bpe_vocab: toCPointer(bpeVocab),
117 tokens_buf: toCPointer(tokensBuf), 126 tokens_buf: toCPointer(tokensBuf),
118 tokens_buf_size: Int32(tokensBufSize), 127 tokens_buf_size: Int32(tokensBufSize),
119 - nemo_ctc: nemoCtc 128 + nemo_ctc: nemoCtc,
  129 + t_one_ctc: toneCtc
120 ) 130 )
121 } 131 }
122 132
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + let filePath = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/0.wav"
  17 + let model =
  18 + "./sherpa-onnx-streaming-t-one-russian-2025-09-08/model.onnx"
  19 + let tokens = "./sherpa-onnx-streaming-t-one-russian-2025-09-08/tokens.txt"
  20 +
  21 + let toneCtcConfig = sherpaOnnxOnlineToneCtcModelConfig(
  22 + model: model)
  23 +
  24 + let modelConfig = sherpaOnnxOnlineModelConfig(
  25 + tokens: tokens,
  26 + toneCtc: toneCtcConfig
  27 + )
  28 +
  29 + let featConfig = sherpaOnnxFeatureConfig(
  30 + sampleRate: 8000,
  31 + featureDim: 80
  32 + )
  33 + var config = sherpaOnnxOnlineRecognizerConfig(
  34 + featConfig: featConfig, // not used
  35 + modelConfig: modelConfig
  36 + )
  37 +
  38 + let recognizer = SherpaOnnxRecognizer(config: &config)
  39 +
  40 + let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  41 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  42 +
  43 + let audioFormat = audioFile.processingFormat
  44 + assert(audioFormat.sampleRate == 8000)
  45 + assert(audioFormat.channelCount == 1)
  46 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  47 +
  48 + let audioFrameCount = UInt32(audioFile.length)
  49 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  50 +
  51 + try! audioFile.read(into: audioFileBuffer!)
  52 + let array: [Float]! = audioFileBuffer?.array()
  53 +
  54 + let leftPadding = [Float](repeating: 0.0, count: 2400)
  55 + recognizer.acceptWaveform(samples: leftPadding, sampleRate: Int(audioFormat.sampleRate))
  56 +
  57 + recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate))
  58 +
  59 + let tailPadding = [Float](repeating: 0.0, count: 4800)
  60 + recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate))
  61 +
  62 + recognizer.inputFinished()
  63 + while recognizer.isReady() {
  64 + recognizer.decode()
  65 + }
  66 +
  67 + let result = recognizer.getResult()
  68 + print("\nresult is:\n\(result.text)")
  69 + print("\nresult is:\n\(result.timestamps)")
  70 +}
  71 +
  72 +@main
  73 +struct App {
  74 + static func main() {
  75 + run()
  76 + }
  77 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -d ./sherpa-onnx-streaming-t-one-russian-2025-09-08 ]; then
  11 + echo "Downloading the pre-trained model for testing."
  12 +
  13 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  14 + tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  15 + rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
  16 +fi
  17 +
  18 +if [ ! -e ./decode-file-t-one-streaming ]; then
  19 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  20 + swiftc \
  21 + -lc++ \
  22 + -I ../build-swift-macos/install/include \
  23 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  24 + ./decode-file-t-one-streaming.swift ./SherpaOnnx.swift \
  25 + -L ../build-swift-macos/install/lib/ \
  26 + -l sherpa-onnx \
  27 + -l onnxruntime \
  28 + -o decode-file-t-one-streaming
  29 +
  30 + strip decode-file-t-one-streaming
  31 +else
  32 + echo "./decode-file-t-one-streaming exists - skip building"
  33 +fi
  34 +
  35 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  36 +./decode-file-t-one-streaming
@@ -31,6 +31,10 @@ function freeConfig(config, Module) { @@ -31,6 +31,10 @@ function freeConfig(config, Module) {
31 freeConfig(config.nemoCtc, Module) 31 freeConfig(config.nemoCtc, Module)
32 } 32 }
33 33
  34 + if ('toneCtc' in config) {
  35 + freeConfig(config.toneCtc, Module)
  36 + }
  37 +
34 if ('whisper' in config) { 38 if ('whisper' in config) {
35 freeConfig(config.whisper, Module) 39 freeConfig(config.whisper, Module)
36 } 40 }
@@ -173,6 +177,22 @@ function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) { @@ -173,6 +177,22 @@ function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) {
173 } 177 }
174 } 178 }
175 179
  180 +function initSherpaOnnxOnlineToneCtcModelConfig(config, Module) {
  181 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
  182 + const buffer = Module._malloc(n);
  183 +
  184 + const len = 1 * 4; // 1 pointer
  185 + const ptr = Module._malloc(len);
  186 +
  187 + Module.stringToUTF8(config.model || '', buffer, n);
  188 +
  189 + Module.setValue(ptr, buffer, 'i8*');
  190 +
  191 + return {
  192 + buffer: buffer, ptr: ptr, len: len,
  193 + }
  194 +}
  195 +
176 function initSherpaOnnxOnlineModelConfig(config, Module) { 196 function initSherpaOnnxOnlineModelConfig(config, Module) {
177 if (!('transducer' in config)) { 197 if (!('transducer' in config)) {
178 config.transducer = { 198 config.transducer = {
@@ -201,6 +221,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -201,6 +221,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
201 }; 221 };
202 } 222 }
203 223
  224 + if (!('toneCtc' in config)) {
  225 + config.toneCtc = {
  226 + model: '',
  227 + };
  228 + }
  229 +
204 if (!('tokensBuf' in config)) { 230 if (!('tokensBuf' in config)) {
205 config.tokensBuf = ''; 231 config.tokensBuf = '';
206 } 232 }
@@ -221,8 +247,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -221,8 +247,11 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
221 const nemoCtc = 247 const nemoCtc =
222 initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module); 248 initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);
223 249
224 - const len =  
225 - transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 + nemoCtc.len; 250 + const toneCtc =
  251 + initSherpaOnnxOnlineToneCtcModelConfig(config.toneCtc, Module);
  252 +
  253 + const len = transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 +
  254 + nemoCtc.len + toneCtc.len;
226 255
227 const ptr = Module._malloc(len); 256 const ptr = Module._malloc(len);
228 257
@@ -308,9 +337,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -308,9 +337,13 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
308 Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); 337 Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
309 offset += nemoCtc.len; 338 offset += nemoCtc.len;
310 339
  340 + Module._CopyHeap(toneCtc.ptr, toneCtc.len, ptr + offset);
  341 + offset += toneCtc.len;
  342 +
311 return { 343 return {
312 buffer: buffer, ptr: ptr, len: len, transducer: transducer, 344 buffer: buffer, ptr: ptr, len: len, transducer: transducer,
313 - paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc 345 + paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc,
  346 + toneCtc: toneCtc,
314 } 347 }
315 } 348 }
316 349
@@ -519,6 +552,10 @@ function createOnlineRecognizer(Module, myConfig) { @@ -519,6 +552,10 @@ function createOnlineRecognizer(Module, myConfig) {
519 model: '', 552 model: '',
520 }; 553 };
521 554
  555 + const onlineToneCtcModelConfig = {
  556 + model: '',
  557 + };
  558 +
522 let type = 0; 559 let type = 0;
523 560
524 switch (type) { 561 switch (type) {
@@ -541,6 +578,10 @@ function createOnlineRecognizer(Module, myConfig) { @@ -541,6 +578,10 @@ function createOnlineRecognizer(Module, myConfig) {
541 // nemoCtc 578 // nemoCtc
542 onlineNemoCtcModelConfig.model = './nemo-ctc.onnx'; 579 onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
543 break; 580 break;
  581 + case 4:
  582 + // toneCtc
  583 + onlineToneCtcModelConfig.model = './tone-ctc.onnx';
  584 + break;
544 } 585 }
545 586
546 587
@@ -549,6 +590,7 @@ function createOnlineRecognizer(Module, myConfig) { @@ -549,6 +590,7 @@ function createOnlineRecognizer(Module, myConfig) {
549 paraformer: onlineParaformerModelConfig, 590 paraformer: onlineParaformerModelConfig,
550 zipformer2Ctc: onlineZipformer2CtcModelConfig, 591 zipformer2Ctc: onlineZipformer2CtcModelConfig,
551 nemoCtc: onlineNemoCtcModelConfig, 592 nemoCtc: onlineNemoCtcModelConfig,
  593 + toneCtc: onlineToneCtcModelConfig,
552 tokens: './tokens.txt', 594 tokens: './tokens.txt',
553 numThreads: 1, 595 numThreads: 1,
554 provider: 'cpu', 596 provider: 'cpu',
@@ -559,8 +601,8 @@ function createOnlineRecognizer(Module, myConfig) { @@ -559,8 +601,8 @@ function createOnlineRecognizer(Module, myConfig) {
559 }; 601 };
560 602
561 const featureConfig = { 603 const featureConfig = {
562 - sampleRate: 16000,  
563 - featureDim: 80, 604 + sampleRate: 16000, // it is ignored when toneCtc is used
  605 + featureDim: 80, // it is ignored when toneCtc is used
564 }; 606 };
565 607
566 let recognizerConfig = { 608 let recognizerConfig = {
@@ -21,7 +21,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == @@ -21,7 +21,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
21 sizeof(SherpaOnnxOnlineTransducerModelConfig) + 21 sizeof(SherpaOnnxOnlineTransducerModelConfig) +
22 sizeof(SherpaOnnxOnlineParaformerModelConfig) + 22 sizeof(SherpaOnnxOnlineParaformerModelConfig) +
23 sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + 23 sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 +
24 - sizeof(SherpaOnnxOnlineNemoCtcModelConfig), 24 + sizeof(SherpaOnnxOnlineNemoCtcModelConfig) +
  25 + sizeof(SherpaOnnxOnlineToneCtcModelConfig),
25 ""); 26 "");
26 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); 27 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
27 static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); 28 static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
@@ -39,6 +40,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { @@ -39,6 +40,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
39 auto paraformer_model_config = &model_config->paraformer; 40 auto paraformer_model_config = &model_config->paraformer;
40 auto ctc_model_config = &model_config->zipformer2_ctc; 41 auto ctc_model_config = &model_config->zipformer2_ctc;
41 auto nemo_ctc = &model_config->nemo_ctc; 42 auto nemo_ctc = &model_config->nemo_ctc;
  43 + auto t_one_ctc = &model_config->t_one_ctc;
42 44
43 fprintf(stdout, "----------online transducer model config----------\n"); 45 fprintf(stdout, "----------online transducer model config----------\n");
44 fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); 46 fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder);
@@ -55,6 +57,9 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { @@ -55,6 +57,9 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
55 fprintf(stdout, "----------online nemo ctc model config----------\n"); 57 fprintf(stdout, "----------online nemo ctc model config----------\n");
56 fprintf(stdout, "model: %s\n", nemo_ctc->model); 58 fprintf(stdout, "model: %s\n", nemo_ctc->model);
57 59
  60 + fprintf(stdout, "----------online t-one ctc model config----------\n");
  61 + fprintf(stdout, "model: %s\n", t_one_ctc->model);
  62 +
58 fprintf(stdout, "tokens: %s\n", model_config->tokens); 63 fprintf(stdout, "tokens: %s\n", model_config->tokens);
59 fprintf(stdout, "num_threads: %d\n", model_config->num_threads); 64 fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
60 fprintf(stdout, "provider: %s\n", model_config->provider); 65 fprintf(stdout, "provider: %s\n", model_config->provider);
@@ -75,9 +75,10 @@ function initModelConfig(config, Module) { @@ -75,9 +75,10 @@ function initModelConfig(config, Module) {
75 const paraformer_len = 2 * 4 75 const paraformer_len = 2 * 4
76 const zipfomer2_ctc_len = 1 * 4 76 const zipfomer2_ctc_len = 1 * 4
77 const nemo_ctc_len = 1 * 4 77 const nemo_ctc_len = 1 * 4
  78 + const t_one_ctc_len = 1 * 4
78 79
79 const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 + 80 const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 +
80 - nemo_ctc_len; 81 + nemo_ctc_len + t_one_ctc_len;
81 82
82 const ptr = Module._malloc(len); 83 const ptr = Module._malloc(len);
83 Module.HEAPU8.fill(0, ptr, ptr + len); 84 Module.HEAPU8.fill(0, ptr, ptr + len);
@@ -152,6 +153,7 @@ function initModelConfig(config, Module) { @@ -152,6 +153,7 @@ function initModelConfig(config, Module) {
152 153
153 Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); 154 Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32');
154 offset += 4; 155 offset += 4;
  156 + // skip nemo_ctc and t_one_ctc
155 157
156 return { 158 return {
157 buffer: buffer, ptr: ptr, len: len, transducer: transducer 159 buffer: buffer, ptr: ptr, len: len, transducer: transducer
@@ -20,7 +20,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == @@ -20,7 +20,8 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
20 sizeof(SherpaOnnxOnlineTransducerModelConfig) + 20 sizeof(SherpaOnnxOnlineTransducerModelConfig) +
21 sizeof(SherpaOnnxOnlineParaformerModelConfig) + 21 sizeof(SherpaOnnxOnlineParaformerModelConfig) +
22 sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + 22 sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 +
23 - sizeof(SherpaOnnxOnlineNemoCtcModelConfig), 23 + sizeof(SherpaOnnxOnlineNemoCtcModelConfig) +
  24 + sizeof(SherpaOnnxOnlineToneCtcModelConfig),
24 ""); 25 "");
25 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); 26 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
26 static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) == 27 static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) ==