Fangjun Kuang
Committed by GitHub

Add APIs for Online NeMo CTC models (#2454)

@@ -9,6 +9,49 @@ git status @@ -9,6 +9,49 @@ git status
9 ls -lh 9 ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
  12 +# online asr
  13 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  14 +tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  15 +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
  16 +node ./test-online-paraformer.js
  17 +rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
  18 +
  19 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  20 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  21 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  22 +
  23 +rm -f itn*
  24 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  25 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  26 +
  27 +node ./test-online-transducer-itn.js
  28 +
  29 +node ./test-online-transducer.js
  30 +
  31 +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  32 +
  33 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  34 +tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  35 +rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  36 +
  37 +node ./test-online-zipformer2-ctc.js
  38 +rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
  39 +
  40 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  41 +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  42 +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
  43 +node ./test-online-zipformer2-ctc-hlg.js
  44 +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
  45 +
  46 +echo "----------keyword spotting----------"
  47 +
  48 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  49 +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  50 +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  51 +
  52 +node ./test-keyword-spotter-transducer.js
  53 +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
  54 +
12 # asr with offline nemo canary 55 # asr with offline nemo canary
13 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 56 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
14 tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 57 tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
@@ -145,15 +188,6 @@ rm Obama.wav @@ -145,15 +188,6 @@ rm Obama.wav
145 rm silero_vad.onnx 188 rm silero_vad.onnx
146 rm -rf sherpa-onnx-whisper-tiny.en 189 rm -rf sherpa-onnx-whisper-tiny.en
147 190
148 -echo "----------keyword spotting----------"  
149 -  
150 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2  
151 -tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2  
152 -rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2  
153 -  
154 -node ./test-keyword-spotter-transducer.js  
155 -rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01  
156 -  
157 # offline asr 191 # offline asr
158 # 192 #
159 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 193 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
@@ -218,37 +252,3 @@ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 @@ -218,37 +252,3 @@ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
218 252
219 node ./test-offline-moonshine.js 253 node ./test-offline-moonshine.js
220 rm -rf sherpa-onnx-moonshine-* 254 rm -rf sherpa-onnx-moonshine-*
221 -  
222 -# online asr  
223 -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2  
224 -tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2  
225 -rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2  
226 -node ./test-online-paraformer.js  
227 -rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en  
228 -  
229 -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2  
230 -tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2  
231 -rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2  
232 -  
233 -rm -f itn*  
234 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav  
235 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst  
236 -  
237 -node ./test-online-transducer-itn.js  
238 -  
239 -node ./test-online-transducer.js  
240 -  
241 -rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20  
242 -  
243 -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2  
244 -tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2  
245 -rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2  
246 -  
247 -node ./test-online-zipformer2-ctc.js  
248 -rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13  
249 -  
250 -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2  
251 -tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2  
252 -rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2  
253 -node ./test-online-zipformer2-ctc-hlg.js  
254 -rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18  
@@ -148,7 +148,7 @@ to download pre-trained non-streaming zipformer models. @@ -148,7 +148,7 @@ to download pre-trained non-streaming zipformer models.
148 148
149 dotnet run \ 149 dotnet run \
150 --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ 150 --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
151 - --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.onnx \ 151 + --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
152 --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ 152 --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
153 ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \ 153 ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \
154 ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \ 154 ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \
@@ -18,7 +18,7 @@ fi @@ -18,7 +18,7 @@ fi
18 18
19 dotnet run \ 19 dotnet run \
20 --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ 20 --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
21 - --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.onnx \ 21 + --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
22 --rule-fsts=./itn_zh_number.fst \ 22 --rule-fsts=./itn_zh_number.fst \
23 --num-threads=2 \ 23 --num-threads=2 \
24 --files ./itn-zh-number.wav 24 --files ./itn-zh-number.wav
@@ -10,7 +10,7 @@ fi @@ -10,7 +10,7 @@ fi
10 10
11 dotnet run \ 11 dotnet run \
12 --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ 12 --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
13 - --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.onnx \ 13 + --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
14 --num-threads=2 \ 14 --num-threads=2 \
15 --files ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \ 15 --files ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \
16 ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \ 16 ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \
@@ -13,6 +13,4 @@ dotnet run \ @@ -13,6 +13,4 @@ dotnet run \
13 --tokens=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \ 13 --tokens=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \
14 --zipformer-ctc=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \ 14 --zipformer-ctc=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \
15 --num-threads=1 \ 15 --num-threads=1 \
16 - --files ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav \  
17 - ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/1.wav \  
18 - ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/8k.wav 16 + --files ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav
@@ -121,6 +121,9 @@ class KeywordSpotter { @@ -121,6 +121,9 @@ class KeywordSpotter {
121 c.ref.model.zipformer2Ctc.model = 121 c.ref.model.zipformer2Ctc.model =
122 config.model.zipformer2Ctc.model.toNativeUtf8(); 122 config.model.zipformer2Ctc.model.toNativeUtf8();
123 123
  124 + // nemoCtc
  125 + c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8();
  126 +
124 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 127 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
125 c.ref.model.numThreads = config.model.numThreads; 128 c.ref.model.numThreads = config.model.numThreads;
126 c.ref.model.provider = config.model.provider.toNativeUtf8(); 129 c.ref.model.provider = config.model.provider.toNativeUtf8();
@@ -146,6 +149,7 @@ class KeywordSpotter { @@ -146,6 +149,7 @@ class KeywordSpotter {
146 calloc.free(c.ref.model.modelType); 149 calloc.free(c.ref.model.modelType);
147 calloc.free(c.ref.model.provider); 150 calloc.free(c.ref.model.provider);
148 calloc.free(c.ref.model.tokens); 151 calloc.free(c.ref.model.tokens);
  152 + calloc.free(c.ref.model.nemoCtc.model);
149 calloc.free(c.ref.model.zipformer2Ctc.model); 153 calloc.free(c.ref.model.zipformer2Ctc.model);
150 calloc.free(c.ref.model.paraformer.encoder); 154 calloc.free(c.ref.model.paraformer.encoder);
151 calloc.free(c.ref.model.paraformer.decoder); 155 calloc.free(c.ref.model.paraformer.decoder);
@@ -86,11 +86,33 @@ class OnlineZipformer2CtcModelConfig { @@ -86,11 +86,33 @@ class OnlineZipformer2CtcModelConfig {
86 final String model; 86 final String model;
87 } 87 }
88 88
  89 +class OnlineNemoCtcModelConfig {
  90 + const OnlineNemoCtcModelConfig({this.model = ''});
  91 +
  92 + factory OnlineNemoCtcModelConfig.fromJson(Map<String, dynamic> json) {
  93 + return OnlineNemoCtcModelConfig(
  94 + model: json['model'] as String? ?? '',
  95 + );
  96 + }
  97 +
  98 + @override
  99 + String toString() {
  100 + return 'OnlineNemoCtcModelConfig(model: $model)';
  101 + }
  102 +
  103 + Map<String, dynamic> toJson() => {
  104 + 'model': model,
  105 + };
  106 +
  107 + final String model;
  108 +}
  109 +
89 class OnlineModelConfig { 110 class OnlineModelConfig {
90 const OnlineModelConfig({ 111 const OnlineModelConfig({
91 this.transducer = const OnlineTransducerModelConfig(), 112 this.transducer = const OnlineTransducerModelConfig(),
92 this.paraformer = const OnlineParaformerModelConfig(), 113 this.paraformer = const OnlineParaformerModelConfig(),
93 this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), 114 this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(),
  115 + this.nemoCtc = const OnlineNemoCtcModelConfig(),
94 required this.tokens, 116 required this.tokens,
95 this.numThreads = 1, 117 this.numThreads = 1,
96 this.provider = 'cpu', 118 this.provider = 'cpu',
@@ -108,6 +130,8 @@ class OnlineModelConfig { @@ -108,6 +130,8 @@ class OnlineModelConfig {
108 json['paraformer'] as Map<String, dynamic>? ?? const {}), 130 json['paraformer'] as Map<String, dynamic>? ?? const {}),
109 zipformer2Ctc: OnlineZipformer2CtcModelConfig.fromJson( 131 zipformer2Ctc: OnlineZipformer2CtcModelConfig.fromJson(
110 json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}), 132 json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}),
  133 + nemoCtc: OnlineNemoCtcModelConfig.fromJson(
  134 + json['nemoCtc'] as Map<String, dynamic>? ?? const {}),
111 tokens: json['tokens'] as String, 135 tokens: json['tokens'] as String,
112 numThreads: json['numThreads'] as int? ?? 1, 136 numThreads: json['numThreads'] as int? ?? 1,
113 provider: json['provider'] as String? ?? 'cpu', 137 provider: json['provider'] as String? ?? 'cpu',
@@ -120,13 +144,14 @@ class OnlineModelConfig { @@ -120,13 +144,14 @@ class OnlineModelConfig {
120 144
121 @override 145 @override
122 String toString() { 146 String toString() {
123 - return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; 147 + return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)';
124 } 148 }
125 149
126 Map<String, dynamic> toJson() => { 150 Map<String, dynamic> toJson() => {
127 'transducer': transducer.toJson(), 151 'transducer': transducer.toJson(),
128 'paraformer': paraformer.toJson(), 152 'paraformer': paraformer.toJson(),
129 'zipformer2Ctc': zipformer2Ctc.toJson(), 153 'zipformer2Ctc': zipformer2Ctc.toJson(),
  154 + 'nemoCtc': nemoCtc.toJson(),
130 'tokens': tokens, 155 'tokens': tokens,
131 'numThreads': numThreads, 156 'numThreads': numThreads,
132 'provider': provider, 157 'provider': provider,
@@ -139,6 +164,7 @@ class OnlineModelConfig { @@ -139,6 +164,7 @@ class OnlineModelConfig {
139 final OnlineTransducerModelConfig transducer; 164 final OnlineTransducerModelConfig transducer;
140 final OnlineParaformerModelConfig paraformer; 165 final OnlineParaformerModelConfig paraformer;
141 final OnlineZipformer2CtcModelConfig zipformer2Ctc; 166 final OnlineZipformer2CtcModelConfig zipformer2Ctc;
  167 + final OnlineNemoCtcModelConfig nemoCtc;
142 168
143 final String tokens; 169 final String tokens;
144 170
@@ -333,6 +359,9 @@ class OnlineRecognizer { @@ -333,6 +359,9 @@ class OnlineRecognizer {
333 c.ref.model.zipformer2Ctc.model = 359 c.ref.model.zipformer2Ctc.model =
334 config.model.zipformer2Ctc.model.toNativeUtf8(); 360 config.model.zipformer2Ctc.model.toNativeUtf8();
335 361
  362 + // nemoCtc
  363 + c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8();
  364 +
336 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 365 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
337 c.ref.model.numThreads = config.model.numThreads; 366 c.ref.model.numThreads = config.model.numThreads;
338 c.ref.model.provider = config.model.provider.toNativeUtf8(); 367 c.ref.model.provider = config.model.provider.toNativeUtf8();
@@ -377,6 +406,7 @@ class OnlineRecognizer { @@ -377,6 +406,7 @@ class OnlineRecognizer {
377 calloc.free(c.ref.model.modelType); 406 calloc.free(c.ref.model.modelType);
378 calloc.free(c.ref.model.provider); 407 calloc.free(c.ref.model.provider);
379 calloc.free(c.ref.model.tokens); 408 calloc.free(c.ref.model.tokens);
  409 + calloc.free(c.ref.model.nemoCtc.model);
380 calloc.free(c.ref.model.zipformer2Ctc.model); 410 calloc.free(c.ref.model.zipformer2Ctc.model);
381 calloc.free(c.ref.model.paraformer.encoder); 411 calloc.free(c.ref.model.paraformer.encoder);
382 calloc.free(c.ref.model.paraformer.decoder); 412 calloc.free(c.ref.model.paraformer.decoder);
@@ -388,6 +388,10 @@ final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct { @@ -388,6 +388,10 @@ final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct {
388 external Pointer<Utf8> model; 388 external Pointer<Utf8> model;
389 } 389 }
390 390
  391 +final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct {
  392 + external Pointer<Utf8> model;
  393 +}
  394 +
391 final class SherpaOnnxOnlineModelConfig extends Struct { 395 final class SherpaOnnxOnlineModelConfig extends Struct {
392 external SherpaOnnxOnlineTransducerModelConfig transducer; 396 external SherpaOnnxOnlineTransducerModelConfig transducer;
393 external SherpaOnnxOnlineParaformerModelConfig paraformer; 397 external SherpaOnnxOnlineParaformerModelConfig paraformer;
@@ -413,6 +417,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct { @@ -413,6 +417,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct {
413 417
414 @Int32() 418 @Int32()
415 external int tokensBufSize; 419 external int tokensBufSize;
  420 +
  421 + external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc;
416 } 422 }
417 423
418 final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { 424 final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct {
1 module non-streaming-canary-decode-files 1 module non-streaming-canary-decode-files
2 2
3 go 1.17 3 go 1.17
4 -  
5 -require (  
6 - github.com/k2-fsa/sherpa-onnx-go v1.12.4  
7 - github.com/spf13/pflag v1.0.6  
8 - github.com/youpy/go-wav v0.3.2  
9 -)  
10 -  
11 -require (  
12 - github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect  
13 - github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect  
14 - github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect  
15 - github.com/youpy/go-riff v0.1.0 // indirect  
16 - github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect  
17 -)  
@@ -25,6 +25,7 @@ export { Samples, @@ -25,6 +25,7 @@ export { Samples,
25 } from './src/main/ets/components/NonStreamingAsr'; 25 } from './src/main/ets/components/NonStreamingAsr';
26 26
27 export { OnlineStream, 27 export { OnlineStream,
  28 + OnlineNemoCtcModelConfig,
28 OnlineTransducerModelConfig, 29 OnlineTransducerModelConfig,
29 OnlineParaformerModelConfig, 30 OnlineParaformerModelConfig,
30 OnlineZipformer2CtcModelConfig, 31 OnlineZipformer2CtcModelConfig,
@@ -73,6 +73,22 @@ GetOnlineZipformer2CtcModelConfig(Napi::Object obj) { @@ -73,6 +73,22 @@ GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
73 return c; 73 return c;
74 } 74 }
75 75
  76 +static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig(
  77 + Napi::Object obj) {
  78 + SherpaOnnxOnlineNemoCtcModelConfig c;
  79 + memset(&c, 0, sizeof(c));
  80 +
  81 + if (!obj.Has("nemoCtc") || !obj.Get("nemoCtc").IsObject()) {
  82 + return c;
  83 + }
  84 +
  85 + Napi::Object o = obj.Get("nemoCtc").As<Napi::Object>();
  86 +
  87 + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  88 +
  89 + return c;
  90 +}
  91 +
76 static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( 92 static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig(
77 Napi::Object obj) { 93 Napi::Object obj) {
78 SherpaOnnxOnlineParaformerModelConfig c; 94 SherpaOnnxOnlineParaformerModelConfig c;
@@ -103,6 +119,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { @@ -103,6 +119,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
103 c.transducer = GetOnlineTransducerModelConfig(o); 119 c.transducer = GetOnlineTransducerModelConfig(o);
104 c.paraformer = GetOnlineParaformerModelConfig(o); 120 c.paraformer = GetOnlineParaformerModelConfig(o);
105 c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o); 121 c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
  122 + c.nemo_ctc = GetOnlineNemoCtcModelConfig(o);
106 123
107 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); 124 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
108 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); 125 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -248,6 +265,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( @@ -248,6 +265,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
248 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder); 265 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder);
249 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder); 266 SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder);
250 267
  268 + SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model);
251 SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model); 269 SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model);
252 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); 270 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
253 SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); 271 SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
@@ -46,10 +46,15 @@ export class OnlineZipformer2CtcModelConfig { @@ -46,10 +46,15 @@ export class OnlineZipformer2CtcModelConfig {
46 public model: string = ''; 46 public model: string = '';
47 } 47 }
48 48
  49 +export class OnlineNemoCtcModelConfig {
  50 + public model: string = '';
  51 +}
  52 +
49 export class OnlineModelConfig { 53 export class OnlineModelConfig {
50 public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig(); 54 public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig();
51 public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig(); 55 public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig();
52 public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); 56 public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();
  57 + public nemo_ctc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig();
53 public tokens: string = ''; 58 public tokens: string = '';
54 public numThreads: number = 1; 59 public numThreads: number = 1;
55 public provider: string = 'cpu'; 60 public provider: string = 'cpu';
@@ -338,7 +338,7 @@ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() { @@ -338,7 +338,7 @@ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
338 msg += 338 msg +=
339 "wget " 339 "wget "
340 "https://huggingface.co/csukuangfj/" 340 "https://huggingface.co/csukuangfj/"
341 - "sherpa-onnx-paraformer-zh-2023-09-14/resolve/main/model.onnx\r\n"; 341 + "sherpa-onnx-paraformer-zh-2023-09-14/resolve/main/model.int8.onnx\r\n";
342 msg += 342 msg +=
343 "wget " 343 "wget "
344 "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14/" 344 "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14/"
@@ -24,6 +24,7 @@ namespace SherpaOnnx @@ -24,6 +24,7 @@ namespace SherpaOnnx
24 BpeVocab = ""; 24 BpeVocab = "";
25 TokensBuf = ""; 25 TokensBuf = "";
26 TokensBufSize = 0; 26 TokensBufSize = 0;
  27 + NemoCtc = new OnlineNemoCtcModelConfig();
27 } 28 }
28 29
29 public OnlineTransducerModelConfig Transducer; 30 public OnlineTransducerModelConfig Transducer;
@@ -55,6 +56,8 @@ namespace SherpaOnnx @@ -55,6 +56,8 @@ namespace SherpaOnnx
55 public string TokensBuf; 56 public string TokensBuf;
56 57
57 public int TokensBufSize; 58 public int TokensBufSize;
  59 +
  60 + public OnlineNemoCtcModelConfig NemoCtc;
58 } 61 }
59 62
60 } 63 }
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct OnlineNemoCtcModelConfig
  9 + {
  10 + public OnlineNemoCtcModelConfig()
  11 + {
  12 + Model = "";
  13 + }
  14 +
  15 + [MarshalAs(UnmanagedType.LPStr)]
  16 + public string Model;
  17 + }
  18 +}
@@ -77,6 +77,10 @@ type OnlineZipformer2CtcModelConfig struct { @@ -77,6 +77,10 @@ type OnlineZipformer2CtcModelConfig struct {
77 Model string // Path to the onnx model 77 Model string // Path to the onnx model
78 } 78 }
79 79
  80 +type OnlineNemoCtcModelConfig struct {
  81 + Model string // Path to the onnx model
  82 +}
  83 +
80 // Configuration for online/streaming models 84 // Configuration for online/streaming models
81 // 85 //
82 // Please refer to 86 // Please refer to
@@ -87,6 +91,7 @@ type OnlineModelConfig struct { @@ -87,6 +91,7 @@ type OnlineModelConfig struct {
87 Transducer OnlineTransducerModelConfig 91 Transducer OnlineTransducerModelConfig
88 Paraformer OnlineParaformerModelConfig 92 Paraformer OnlineParaformerModelConfig
89 Zipformer2Ctc OnlineZipformer2CtcModelConfig 93 Zipformer2Ctc OnlineZipformer2CtcModelConfig
  94 + NemoCtc OnlineNemoCtcModelConfig
90 Tokens string // Path to tokens.txt 95 Tokens string // Path to tokens.txt
91 NumThreads int // Number of threads to use for neural network computation 96 NumThreads int // Number of threads to use for neural network computation
92 Provider string // Optional. Valid values are: cpu, cuda, coreml 97 Provider string // Optional. Valid values are: cpu, cuda, coreml
@@ -197,6 +202,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { @@ -197,6 +202,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
197 c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) 202 c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model)
198 defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) 203 defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model))
199 204
  205 + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model)
  206 + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))
  207 +
200 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 208 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
201 defer C.free(unsafe.Pointer(c.model_config.tokens)) 209 defer C.free(unsafe.Pointer(c.model_config.tokens))
202 210
@@ -1814,6 +1822,9 @@ func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter { @@ -1814,6 +1822,9 @@ func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter {
1814 c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) 1822 c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model)
1815 defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) 1823 defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model))
1816 1824
  1825 + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model)
  1826 + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))
  1827 +
1817 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 1828 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
1818 defer C.free(unsafe.Pointer(c.model_config.tokens)) 1829 defer C.free(unsafe.Pointer(c.model_config.tokens))
1819 1830
@@ -97,6 +97,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( @@ -97,6 +97,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
97 config->model_config.tokens_buf, config->model_config.tokens_buf_size); 97 config->model_config.tokens_buf, config->model_config.tokens_buf_size);
98 } 98 }
99 99
  100 + recognizer_config.model_config.nemo_ctc.model =
  101 + SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
  102 +
100 recognizer_config.model_config.num_threads = 103 recognizer_config.model_config.num_threads =
101 SHERPA_ONNX_OR(config->model_config.num_threads, 1); 104 SHERPA_ONNX_OR(config->model_config.num_threads, 1);
102 recognizer_config.model_config.provider_config.provider = 105 recognizer_config.model_config.provider_config.provider =
@@ -108,8 +111,7 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( @@ -108,8 +111,7 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig(
108 111
109 recognizer_config.model_config.model_type = 112 recognizer_config.model_config.model_type =
110 SHERPA_ONNX_OR(config->model_config.model_type, ""); 113 SHERPA_ONNX_OR(config->model_config.model_type, "");
111 - recognizer_config.model_config.debug =  
112 - SHERPA_ONNX_OR(config->model_config.debug, 0); 114 + recognizer_config.model_config.debug = config->model_config.debug;
113 recognizer_config.model_config.modeling_unit = 115 recognizer_config.model_config.modeling_unit =
114 SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); 116 SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar");
115 117
@@ -431,8 +433,7 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( @@ -431,8 +433,7 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
431 SHERPA_ONNX_OR(config->model_config.tokens, ""); 433 SHERPA_ONNX_OR(config->model_config.tokens, "");
432 recognizer_config.model_config.num_threads = 434 recognizer_config.model_config.num_threads =
433 SHERPA_ONNX_OR(config->model_config.num_threads, 1); 435 SHERPA_ONNX_OR(config->model_config.num_threads, 1);
434 - recognizer_config.model_config.debug =  
435 - SHERPA_ONNX_OR(config->model_config.debug, 0); 436 + recognizer_config.model_config.debug = config->model_config.debug;
436 recognizer_config.model_config.provider = 437 recognizer_config.model_config.provider =
437 SHERPA_ONNX_OR(config->model_config.provider, "cpu"); 438 SHERPA_ONNX_OR(config->model_config.provider, "cpu");
438 if (recognizer_config.model_config.provider.empty()) { 439 if (recognizer_config.model_config.provider.empty()) {
@@ -759,6 +760,9 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig( @@ -759,6 +760,9 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig(
759 spotter_config.model_config.zipformer2_ctc.model = 760 spotter_config.model_config.zipformer2_ctc.model =
760 SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, ""); 761 SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, "");
761 762
  763 + spotter_config.model_config.nemo_ctc.model =
  764 + SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
  765 +
762 spotter_config.model_config.tokens = 766 spotter_config.model_config.tokens =
763 SHERPA_ONNX_OR(config->model_config.tokens, ""); 767 SHERPA_ONNX_OR(config->model_config.tokens, "");
764 if (config->model_config.tokens_buf && 768 if (config->model_config.tokens_buf &&
@@ -777,8 +781,7 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig( @@ -777,8 +781,7 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig(
777 781
778 spotter_config.model_config.model_type = 782 spotter_config.model_config.model_type =
779 SHERPA_ONNX_OR(config->model_config.model_type, ""); 783 SHERPA_ONNX_OR(config->model_config.model_type, "");
780 - spotter_config.model_config.debug =  
781 - SHERPA_ONNX_OR(config->model_config.debug, 0); 784 + spotter_config.model_config.debug = config->model_config.debug;
782 785
783 spotter_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4); 786 spotter_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
784 787
@@ -1055,7 +1058,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig( @@ -1055,7 +1058,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig(
1055 vad_config.provider = "cpu"; 1058 vad_config.provider = "cpu";
1056 } 1059 }
1057 1060
1058 - vad_config.debug = SHERPA_ONNX_OR(config->debug, false); 1061 + vad_config.debug = config->debug;
1059 1062
1060 if (vad_config.debug) { 1063 if (vad_config.debug) {
1061 #if __OHOS__ 1064 #if __OHOS__
@@ -1542,7 +1545,7 @@ GetSpeakerEmbeddingExtractorConfig( @@ -1542,7 +1545,7 @@ GetSpeakerEmbeddingExtractorConfig(
1542 c.model = SHERPA_ONNX_OR(config->model, ""); 1545 c.model = SHERPA_ONNX_OR(config->model, "");
1543 1546
1544 c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); 1547 c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
1545 - c.debug = SHERPA_ONNX_OR(config->debug, 0); 1548 + c.debug = config->debug;
1546 c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); 1549 c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
1547 if (c.provider.empty()) { 1550 if (c.provider.empty()) {
1548 c.provider = "cpu"; 1551 c.provider = "cpu";
@@ -100,6 +100,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineZipformer2CtcModelConfig { @@ -100,6 +100,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineZipformer2CtcModelConfig {
100 const char *model; 100 const char *model;
101 } SherpaOnnxOnlineZipformer2CtcModelConfig; 101 } SherpaOnnxOnlineZipformer2CtcModelConfig;
102 102
  103 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineNemoCtcModelConfig {
  104 + const char *model;
  105 +} SherpaOnnxOnlineNemoCtcModelConfig;
  106 +
103 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { 107 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig {
104 SherpaOnnxOnlineTransducerModelConfig transducer; 108 SherpaOnnxOnlineTransducerModelConfig transducer;
105 SherpaOnnxOnlineParaformerModelConfig paraformer; 109 SherpaOnnxOnlineParaformerModelConfig paraformer;
@@ -120,6 +124,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { @@ -120,6 +124,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig {
120 const char *tokens_buf; 124 const char *tokens_buf;
121 /// byte size excluding the trailing '\0' 125 /// byte size excluding the trailing '\0'
122 int32_t tokens_buf_size; 126 int32_t tokens_buf_size;
  127 + SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc;
123 } SherpaOnnxOnlineModelConfig; 128 } SherpaOnnxOnlineModelConfig;
124 129
125 /// It expects 16 kHz 16-bit single channel wave format. 130 /// It expects 16 kHz 16-bit single channel wave format.
@@ -69,6 +69,8 @@ OnlineRecognizer OnlineRecognizer::Create( @@ -69,6 +69,8 @@ OnlineRecognizer OnlineRecognizer::Create(
69 c.model_config.zipformer2_ctc.model = 69 c.model_config.zipformer2_ctc.model =
70 config.model_config.zipformer2_ctc.model.c_str(); 70 config.model_config.zipformer2_ctc.model.c_str();
71 71
  72 + c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();
  73 +
72 c.model_config.tokens = config.model_config.tokens.c_str(); 74 c.model_config.tokens = config.model_config.tokens.c_str();
73 c.model_config.num_threads = config.model_config.num_threads; 75 c.model_config.num_threads = config.model_config.num_threads;
74 c.model_config.provider = config.model_config.provider.c_str(); 76 c.model_config.provider = config.model_config.provider.c_str();
@@ -473,6 +475,8 @@ KeywordSpotter KeywordSpotter::Create(const KeywordSpotterConfig &config) { @@ -473,6 +475,8 @@ KeywordSpotter KeywordSpotter::Create(const KeywordSpotterConfig &config) {
473 c.model_config.zipformer2_ctc.model = 475 c.model_config.zipformer2_ctc.model =
474 config.model_config.zipformer2_ctc.model.c_str(); 476 config.model_config.zipformer2_ctc.model.c_str();
475 477
  478 + c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str();
  479 +
476 c.model_config.tokens = config.model_config.tokens.c_str(); 480 c.model_config.tokens = config.model_config.tokens.c_str();
477 c.model_config.num_threads = config.model_config.num_threads; 481 c.model_config.num_threads = config.model_config.num_threads;
478 c.model_config.provider = config.model_config.provider.c_str(); 482 c.model_config.provider = config.model_config.provider.c_str();
@@ -32,10 +32,15 @@ struct OnlineZipformer2CtcModelConfig { @@ -32,10 +32,15 @@ struct OnlineZipformer2CtcModelConfig {
32 std::string model; 32 std::string model;
33 }; 33 };
34 34
  35 +struct OnlineNemoCtcModelConfig {
  36 + std::string model;
  37 +};
  38 +
35 struct OnlineModelConfig { 39 struct OnlineModelConfig {
36 OnlineTransducerModelConfig transducer; 40 OnlineTransducerModelConfig transducer;
37 OnlineParaformerModelConfig paraformer; 41 OnlineParaformerModelConfig paraformer;
38 OnlineZipformer2CtcModelConfig zipformer2_ctc; 42 OnlineZipformer2CtcModelConfig zipformer2_ctc;
  43 + OnlineNemoCtcModelConfig nemo_ctc;
39 std::string tokens; 44 std::string tokens;
40 int32_t num_threads = 1; 45 int32_t num_threads = 1;
41 std::string provider = "cpu"; 46 std::string provider = "cpu";
@@ -175,6 +175,77 @@ class SileroVadModelRknn::Impl { @@ -175,6 +175,77 @@ class SileroVadModelRknn::Impl {
175 config_.silero_vad.threshold = threshold; 175 config_.silero_vad.threshold = threshold;
176 } 176 }
177 177
  178 + float Run(const float *samples, int32_t n) {
  179 + std::vector<rknn_input> inputs(input_attrs_.size());
  180 +
  181 + for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) {
  182 + auto &input = inputs[i];
  183 + auto &attr = input_attrs_[i];
  184 + input.index = attr.index;
  185 +
  186 + if (attr.type == RKNN_TENSOR_FLOAT16) {
  187 + input.type = RKNN_TENSOR_FLOAT32;
  188 + } else if (attr.type == RKNN_TENSOR_INT64) {
  189 + input.type = RKNN_TENSOR_INT64;
  190 + } else {
  191 + SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
  192 + get_type_string(attr.type));
  193 + SHERPA_ONNX_EXIT(-1);
  194 + }
  195 +
  196 + input.fmt = attr.fmt;
  197 + if (i == 0) {
  198 + input.buf = reinterpret_cast<void *>(const_cast<float *>(samples));
  199 + input.size = n * sizeof(float);
  200 + } else {
  201 + input.buf = reinterpret_cast<void *>(states_[i - 1].data());
  202 + input.size = states_[i - 1].size() * sizeof(float);
  203 + }
  204 + }
  205 +
  206 + std::vector<float> out(output_attrs_[0].n_elems);
  207 +
  208 + auto &next_states = states_;
  209 +
  210 + std::vector<rknn_output> outputs(output_attrs_.size());
  211 +
  212 + for (int32_t i = 0; i < outputs.size(); ++i) {
  213 + auto &output = outputs[i];
  214 + auto &attr = output_attrs_[i];
  215 + output.index = attr.index;
  216 + output.is_prealloc = 1;
  217 +
  218 + if (attr.type == RKNN_TENSOR_FLOAT16) {
  219 + output.want_float = 1;
  220 + } else if (attr.type == RKNN_TENSOR_INT64) {
  221 + output.want_float = 0;
  222 + } else {
  223 + SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,
  224 + get_type_string(attr.type));
  225 + SHERPA_ONNX_EXIT(-1);
  226 + }
  227 +
  228 + if (i == 0) {
  229 + output.size = out.size() * sizeof(float);
  230 + output.buf = reinterpret_cast<void *>(out.data());
  231 + } else {
  232 + output.size = next_states[i - 1].size() * sizeof(float);
  233 + output.buf = reinterpret_cast<void *>(next_states[i - 1].data());
  234 + }
  235 + }
  236 +
  237 + auto ret = rknn_inputs_set(ctx_, inputs.size(), inputs.data());
  238 + SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs");
  239 +
  240 + ret = rknn_run(ctx_, nullptr);
  241 + SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model");
  242 +
  243 + ret = rknn_outputs_get(ctx_, outputs.size(), outputs.data(), nullptr);
  244 + SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output");
  245 +
  246 + return out[0];
  247 + }
  248 +
178 private: 249 private:
179 void Init(void *model_data, size_t model_data_length) { 250 void Init(void *model_data, size_t model_data_length) {
180 InitContext(model_data, model_data_length, config_.debug, &ctx_); 251 InitContext(model_data, model_data_length, config_.debug, &ctx_);
@@ -267,77 +338,6 @@ class SileroVadModelRknn::Impl { @@ -267,77 +338,6 @@ class SileroVadModelRknn::Impl {
267 Reset(); 338 Reset();
268 } 339 }
269 340
270 - float Run(const float *samples, int32_t n) {  
271 - std::vector<rknn_input> inputs(input_attrs_.size());  
272 -  
273 - for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) {  
274 - auto &input = inputs[i];  
275 - auto &attr = input_attrs_[i];  
276 - input.index = attr.index;  
277 -  
278 - if (attr.type == RKNN_TENSOR_FLOAT16) {  
279 - input.type = RKNN_TENSOR_FLOAT32;  
280 - } else if (attr.type == RKNN_TENSOR_INT64) {  
281 - input.type = RKNN_TENSOR_INT64;  
282 - } else {  
283 - SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,  
284 - get_type_string(attr.type));  
285 - SHERPA_ONNX_EXIT(-1);  
286 - }  
287 -  
288 - input.fmt = attr.fmt;  
289 - if (i == 0) {  
290 - input.buf = reinterpret_cast<void *>(const_cast<float *>(samples));  
291 - input.size = n * sizeof(float);  
292 - } else {  
293 - input.buf = reinterpret_cast<void *>(states_[i - 1].data());  
294 - input.size = states_[i - 1].size() * sizeof(float);  
295 - }  
296 - }  
297 -  
298 - std::vector<float> out(output_attrs_[0].n_elems);  
299 -  
300 - auto &next_states = states_;  
301 -  
302 - std::vector<rknn_output> outputs(output_attrs_.size());  
303 -  
304 - for (int32_t i = 0; i < outputs.size(); ++i) {  
305 - auto &output = outputs[i];  
306 - auto &attr = output_attrs_[i];  
307 - output.index = attr.index;  
308 - output.is_prealloc = 1;  
309 -  
310 - if (attr.type == RKNN_TENSOR_FLOAT16) {  
311 - output.want_float = 1;  
312 - } else if (attr.type == RKNN_TENSOR_INT64) {  
313 - output.want_float = 0;  
314 - } else {  
315 - SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type,  
316 - get_type_string(attr.type));  
317 - SHERPA_ONNX_EXIT(-1);  
318 - }  
319 -  
320 - if (i == 0) {  
321 - output.size = out.size() * sizeof(float);  
322 - output.buf = reinterpret_cast<void *>(out.data());  
323 - } else {  
324 - output.size = next_states[i - 1].size() * sizeof(float);  
325 - output.buf = reinterpret_cast<void *>(next_states[i - 1].data());  
326 - }  
327 - }  
328 -  
329 - auto ret = rknn_inputs_set(ctx_, inputs.size(), inputs.data());  
330 - SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs");  
331 -  
332 - ret = rknn_run(ctx_, nullptr);  
333 - SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model");  
334 -  
335 - ret = rknn_outputs_get(ctx_, outputs.size(), outputs.data(), nullptr);  
336 - SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output");  
337 -  
338 - return out[0];  
339 - }  
340 -  
341 private: 341 private:
342 VadModelConfig config_; 342 VadModelConfig config_;
343 rknn_context ctx_ = 0; 343 rknn_context ctx_ = 0;
@@ -395,6 +395,10 @@ void SileroVadModelRknn::SetThreshold(float threshold) { @@ -395,6 +395,10 @@ void SileroVadModelRknn::SetThreshold(float threshold) {
395 impl_->SetThreshold(threshold); 395 impl_->SetThreshold(threshold);
396 } 396 }
397 397
  398 +float SileroVadModelRknn::Compute(const float *samples, int32_t n) {
  399 + return impl_->Run(samples, n);
  400 +}
  401 +
398 #if __ANDROID_API__ >= 9 402 #if __ANDROID_API__ >= 9
399 template SileroVadModelRknn::SileroVadModelRknn(AAssetManager *mgr, 403 template SileroVadModelRknn::SileroVadModelRknn(AAssetManager *mgr,
400 const VadModelConfig &config); 404 const VadModelConfig &config);
@@ -32,6 +32,7 @@ class SileroVadModelRknn : public VadModel { @@ -32,6 +32,7 @@ class SileroVadModelRknn : public VadModel {
32 * @return Return true if speech is detected. Return false otherwise. 32 * @return Return true if speech is detected. Return false otherwise.
33 */ 33 */
34 bool IsSpeech(const float *samples, int32_t n) override; 34 bool IsSpeech(const float *samples, int32_t n) override;
  35 + float Compute(const float *samples, int32_t n) override;
35 36
36 // For silero vad V4, it is WindowShift(). 37 // For silero vad V4, it is WindowShift().
37 int32_t WindowSize() const override; 38 int32_t WindowSize() const override;
@@ -89,8 +89,8 @@ void SafeJNI(JNIEnv *env, const char *functionName, Func func) { @@ -89,8 +89,8 @@ void SafeJNI(JNIEnv *env, const char *functionName, Func func) {
89 } 89 }
90 90
91 // Helper function to validate JNI pointers 91 // Helper function to validate JNI pointers
92 -inline bool ValidatePointer(JNIEnv *env, jlong ptr,  
93 - const char *functionName, const char *message) { 92 +inline bool ValidatePointer(JNIEnv *env, jlong ptr, const char *functionName,
  93 + const char *message) {
94 if (ptr == 0) { 94 if (ptr == 0) {
95 jclass exClass = env->FindClass("java/lang/NullPointerException"); 95 jclass exClass = env->FindClass("java/lang/NullPointerException");
96 if (exClass != nullptr) { 96 if (exClass != nullptr) {
@@ -9,6 +9,9 @@ @@ -9,6 +9,9 @@
9 9
10 namespace sherpa_onnx { 10 namespace sherpa_onnx {
11 11
  12 +OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls,
  13 + jobject model_config);
  14 +
12 static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) { 15 static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) {
13 KeywordSpotterConfig ans; 16 KeywordSpotterConfig ans;
14 17
@@ -57,54 +60,7 @@ static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) { @@ -57,54 +60,7 @@ static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) {
57 "Lcom/k2fsa/sherpa/onnx/OnlineModelConfig;"); 60 "Lcom/k2fsa/sherpa/onnx/OnlineModelConfig;");
58 jobject model_config = env->GetObjectField(config, fid); 61 jobject model_config = env->GetObjectField(config, fid);
59 jclass model_config_cls = env->GetObjectClass(model_config); 62 jclass model_config_cls = env->GetObjectClass(model_config);
60 -  
61 - // transducer  
62 - fid = env->GetFieldID(model_config_cls, "transducer",  
63 - "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;");  
64 - jobject transducer_config = env->GetObjectField(model_config, fid);  
65 - jclass transducer_config_cls = env->GetObjectClass(transducer_config);  
66 -  
67 - fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;");  
68 - s = (jstring)env->GetObjectField(transducer_config, fid);  
69 - p = env->GetStringUTFChars(s, nullptr);  
70 - ans.model_config.transducer.encoder = p;  
71 - env->ReleaseStringUTFChars(s, p);  
72 -  
73 - fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;");  
74 - s = (jstring)env->GetObjectField(transducer_config, fid);  
75 - p = env->GetStringUTFChars(s, nullptr);  
76 - ans.model_config.transducer.decoder = p;  
77 - env->ReleaseStringUTFChars(s, p);  
78 -  
79 - fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;");  
80 - s = (jstring)env->GetObjectField(transducer_config, fid);  
81 - p = env->GetStringUTFChars(s, nullptr);  
82 - ans.model_config.transducer.joiner = p;  
83 - env->ReleaseStringUTFChars(s, p);  
84 -  
85 - fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;");  
86 - s = (jstring)env->GetObjectField(model_config, fid);  
87 - p = env->GetStringUTFChars(s, nullptr);  
88 - ans.model_config.tokens = p;  
89 - env->ReleaseStringUTFChars(s, p);  
90 -  
91 - fid = env->GetFieldID(model_config_cls, "numThreads", "I");  
92 - ans.model_config.num_threads = env->GetIntField(model_config, fid);  
93 -  
94 - fid = env->GetFieldID(model_config_cls, "debug", "Z");  
95 - ans.model_config.debug = env->GetBooleanField(model_config, fid);  
96 -  
97 - fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");  
98 - s = (jstring)env->GetObjectField(model_config, fid);  
99 - p = env->GetStringUTFChars(s, nullptr);  
100 - ans.model_config.provider_config.provider = p;  
101 - env->ReleaseStringUTFChars(s, p);  
102 -  
103 - fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;");  
104 - s = (jstring)env->GetObjectField(model_config, fid);  
105 - p = env->GetStringUTFChars(s, nullptr);  
106 - ans.model_config.model_type = p;  
107 - env->ReleaseStringUTFChars(s, p); 63 + ans.model_config = GetOnlineModelConfig(env, model_config_cls, model_config);
108 64
109 return ans; 65 return ans;
110 } 66 }
@@ -10,6 +10,117 @@ @@ -10,6 +10,117 @@
10 10
11 namespace sherpa_onnx { 11 namespace sherpa_onnx {
12 12
  13 +OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls,
  14 + jobject model_config) {
  15 + OnlineModelConfig ans;
  16 + // transducer
  17 + auto fid =
  18 + env->GetFieldID(model_config_cls, "transducer",
  19 + "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;");
  20 + jobject transducer_config = env->GetObjectField(model_config, fid);
  21 + jclass transducer_config_cls = env->GetObjectClass(transducer_config);
  22 +
  23 + fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;");
  24 + auto s = (jstring)env->GetObjectField(transducer_config, fid);
  25 + auto p = env->GetStringUTFChars(s, nullptr);
  26 + ans.transducer.encoder = p;
  27 + env->ReleaseStringUTFChars(s, p);
  28 +
  29 + fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;");
  30 + s = (jstring)env->GetObjectField(transducer_config, fid);
  31 + p = env->GetStringUTFChars(s, nullptr);
  32 + ans.transducer.decoder = p;
  33 + env->ReleaseStringUTFChars(s, p);
  34 +
  35 + fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;");
  36 + s = (jstring)env->GetObjectField(transducer_config, fid);
  37 + p = env->GetStringUTFChars(s, nullptr);
  38 + ans.transducer.joiner = p;
  39 + env->ReleaseStringUTFChars(s, p);
  40 +
  41 + // paraformer
  42 + fid = env->GetFieldID(model_config_cls, "paraformer",
  43 + "Lcom/k2fsa/sherpa/onnx/OnlineParaformerModelConfig;");
  44 + jobject paraformer_config = env->GetObjectField(model_config, fid);
  45 + jclass paraformer_config_cls = env->GetObjectClass(paraformer_config);
  46 +
  47 + fid = env->GetFieldID(paraformer_config_cls, "encoder", "Ljava/lang/String;");
  48 + s = (jstring)env->GetObjectField(paraformer_config, fid);
  49 + p = env->GetStringUTFChars(s, nullptr);
  50 + ans.paraformer.encoder = p;
  51 + env->ReleaseStringUTFChars(s, p);
  52 +
  53 + fid = env->GetFieldID(paraformer_config_cls, "decoder", "Ljava/lang/String;");
  54 + s = (jstring)env->GetObjectField(paraformer_config, fid);
  55 + p = env->GetStringUTFChars(s, nullptr);
  56 + ans.paraformer.decoder = p;
  57 + env->ReleaseStringUTFChars(s, p);
  58 +
  59 + // streaming zipformer2 CTC
  60 + fid =
  61 + env->GetFieldID(model_config_cls, "zipformer2Ctc",
  62 + "Lcom/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig;");
  63 + jobject zipformer2_ctc_config = env->GetObjectField(model_config, fid);
  64 + jclass zipformer2_ctc_config_cls = env->GetObjectClass(zipformer2_ctc_config);
  65 +
  66 + fid =
  67 + env->GetFieldID(zipformer2_ctc_config_cls, "model", "Ljava/lang/String;");
  68 + s = (jstring)env->GetObjectField(zipformer2_ctc_config, fid);
  69 + p = env->GetStringUTFChars(s, nullptr);
  70 + ans.zipformer2_ctc.model = p;
  71 + env->ReleaseStringUTFChars(s, p);
  72 +
  73 + // streaming NeMo CTC
  74 + fid = env->GetFieldID(model_config_cls, "neMoCtc",
  75 + "Lcom/k2fsa/sherpa/onnx/OnlineNeMoCtcModelConfig;");
  76 + jobject nemo_ctc_config = env->GetObjectField(model_config, fid);
  77 + jclass nemo_ctc_config_cls = env->GetObjectClass(nemo_ctc_config);
  78 +
  79 + fid = env->GetFieldID(nemo_ctc_config_cls, "model", "Ljava/lang/String;");
  80 + s = (jstring)env->GetObjectField(nemo_ctc_config, fid);
  81 + p = env->GetStringUTFChars(s, nullptr);
  82 + ans.nemo_ctc.model = p;
  83 + env->ReleaseStringUTFChars(s, p);
  84 +
  85 + fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;");
  86 + s = (jstring)env->GetObjectField(model_config, fid);
  87 + p = env->GetStringUTFChars(s, nullptr);
  88 + ans.tokens = p;
  89 + env->ReleaseStringUTFChars(s, p);
  90 +
  91 + fid = env->GetFieldID(model_config_cls, "numThreads", "I");
  92 + ans.num_threads = env->GetIntField(model_config, fid);
  93 +
  94 + fid = env->GetFieldID(model_config_cls, "debug", "Z");
  95 + ans.debug = env->GetBooleanField(model_config, fid);
  96 +
  97 + fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");
  98 + s = (jstring)env->GetObjectField(model_config, fid);
  99 + p = env->GetStringUTFChars(s, nullptr);
  100 + ans.provider_config.provider = p;
  101 + env->ReleaseStringUTFChars(s, p);
  102 +
  103 + fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;");
  104 + s = (jstring)env->GetObjectField(model_config, fid);
  105 + p = env->GetStringUTFChars(s, nullptr);
  106 + ans.model_type = p;
  107 + env->ReleaseStringUTFChars(s, p);
  108 +
  109 + fid = env->GetFieldID(model_config_cls, "modelingUnit", "Ljava/lang/String;");
  110 + s = (jstring)env->GetObjectField(model_config, fid);
  111 + p = env->GetStringUTFChars(s, nullptr);
  112 + ans.modeling_unit = p;
  113 + env->ReleaseStringUTFChars(s, p);
  114 +
  115 + fid = env->GetFieldID(model_config_cls, "bpeVocab", "Ljava/lang/String;");
  116 + s = (jstring)env->GetObjectField(model_config, fid);
  117 + p = env->GetStringUTFChars(s, nullptr);
  118 + ans.bpe_vocab = p;
  119 + env->ReleaseStringUTFChars(s, p);
  120 +
  121 + return ans;
  122 +}
  123 +
13 static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { 124 static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
14 OnlineRecognizerConfig ans; 125 OnlineRecognizerConfig ans;
15 126
@@ -122,109 +233,7 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { @@ -122,109 +233,7 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
122 jobject model_config = env->GetObjectField(config, fid); 233 jobject model_config = env->GetObjectField(config, fid);
123 jclass model_config_cls = env->GetObjectClass(model_config); 234 jclass model_config_cls = env->GetObjectClass(model_config);
124 235
125 - // transducer  
126 - fid = env->GetFieldID(model_config_cls, "transducer",  
127 - "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;");  
128 - jobject transducer_config = env->GetObjectField(model_config, fid);  
129 - jclass transducer_config_cls = env->GetObjectClass(transducer_config);  
130 -  
131 - fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;");  
132 - s = (jstring)env->GetObjectField(transducer_config, fid);  
133 - p = env->GetStringUTFChars(s, nullptr);  
134 - ans.model_config.transducer.encoder = p;  
135 - env->ReleaseStringUTFChars(s, p);  
136 -  
137 - fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;");  
138 - s = (jstring)env->GetObjectField(transducer_config, fid);  
139 - p = env->GetStringUTFChars(s, nullptr);  
140 - ans.model_config.transducer.decoder = p;  
141 - env->ReleaseStringUTFChars(s, p);  
142 -  
143 - fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;");  
144 - s = (jstring)env->GetObjectField(transducer_config, fid);  
145 - p = env->GetStringUTFChars(s, nullptr);  
146 - ans.model_config.transducer.joiner = p;  
147 - env->ReleaseStringUTFChars(s, p);  
148 -  
149 - // paraformer  
150 - fid = env->GetFieldID(model_config_cls, "paraformer",  
151 - "Lcom/k2fsa/sherpa/onnx/OnlineParaformerModelConfig;");  
152 - jobject paraformer_config = env->GetObjectField(model_config, fid);  
153 - jclass paraformer_config_cls = env->GetObjectClass(paraformer_config);  
154 -  
155 - fid = env->GetFieldID(paraformer_config_cls, "encoder", "Ljava/lang/String;");  
156 - s = (jstring)env->GetObjectField(paraformer_config, fid);  
157 - p = env->GetStringUTFChars(s, nullptr);  
158 - ans.model_config.paraformer.encoder = p;  
159 - env->ReleaseStringUTFChars(s, p);  
160 -  
161 - fid = env->GetFieldID(paraformer_config_cls, "decoder", "Ljava/lang/String;");  
162 - s = (jstring)env->GetObjectField(paraformer_config, fid);  
163 - p = env->GetStringUTFChars(s, nullptr);  
164 - ans.model_config.paraformer.decoder = p;  
165 - env->ReleaseStringUTFChars(s, p);  
166 -  
167 - // streaming zipformer2 CTC  
168 - fid =  
169 - env->GetFieldID(model_config_cls, "zipformer2Ctc",  
170 - "Lcom/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig;");  
171 - jobject zipformer2_ctc_config = env->GetObjectField(model_config, fid);  
172 - jclass zipformer2_ctc_config_cls = env->GetObjectClass(zipformer2_ctc_config);  
173 -  
174 - fid =  
175 - env->GetFieldID(zipformer2_ctc_config_cls, "model", "Ljava/lang/String;");  
176 - s = (jstring)env->GetObjectField(zipformer2_ctc_config, fid);  
177 - p = env->GetStringUTFChars(s, nullptr);  
178 - ans.model_config.zipformer2_ctc.model = p;  
179 - env->ReleaseStringUTFChars(s, p);  
180 -  
181 - // streaming NeMo CTC  
182 - fid = env->GetFieldID(model_config_cls, "neMoCtc",  
183 - "Lcom/k2fsa/sherpa/onnx/OnlineNeMoCtcModelConfig;");  
184 - jobject nemo_ctc_config = env->GetObjectField(model_config, fid);  
185 - jclass nemo_ctc_config_cls = env->GetObjectClass(nemo_ctc_config);  
186 -  
187 - fid = env->GetFieldID(nemo_ctc_config_cls, "model", "Ljava/lang/String;");  
188 - s = (jstring)env->GetObjectField(nemo_ctc_config, fid);  
189 - p = env->GetStringUTFChars(s, nullptr);  
190 - ans.model_config.nemo_ctc.model = p;  
191 - env->ReleaseStringUTFChars(s, p);  
192 -  
193 - fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;");  
194 - s = (jstring)env->GetObjectField(model_config, fid);  
195 - p = env->GetStringUTFChars(s, nullptr);  
196 - ans.model_config.tokens = p;  
197 - env->ReleaseStringUTFChars(s, p);  
198 -  
199 - fid = env->GetFieldID(model_config_cls, "numThreads", "I");  
200 - ans.model_config.num_threads = env->GetIntField(model_config, fid);  
201 -  
202 - fid = env->GetFieldID(model_config_cls, "debug", "Z");  
203 - ans.model_config.debug = env->GetBooleanField(model_config, fid);  
204 -  
205 - fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");  
206 - s = (jstring)env->GetObjectField(model_config, fid);  
207 - p = env->GetStringUTFChars(s, nullptr);  
208 - ans.model_config.provider_config.provider = p;  
209 - env->ReleaseStringUTFChars(s, p);  
210 -  
211 - fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;");  
212 - s = (jstring)env->GetObjectField(model_config, fid);  
213 - p = env->GetStringUTFChars(s, nullptr);  
214 - ans.model_config.model_type = p;  
215 - env->ReleaseStringUTFChars(s, p);  
216 -  
217 - fid = env->GetFieldID(model_config_cls, "modelingUnit", "Ljava/lang/String;");  
218 - s = (jstring)env->GetObjectField(model_config, fid);  
219 - p = env->GetStringUTFChars(s, nullptr);  
220 - ans.model_config.modeling_unit = p;  
221 - env->ReleaseStringUTFChars(s, p);  
222 -  
223 - fid = env->GetFieldID(model_config_cls, "bpeVocab", "Ljava/lang/String;");  
224 - s = (jstring)env->GetObjectField(model_config, fid);  
225 - p = env->GetStringUTFChars(s, nullptr);  
226 - ans.model_config.bpe_vocab = p;  
227 - env->ReleaseStringUTFChars(s, p); 236 + ans.model_config = GetOnlineModelConfig(env, model_config_cls, model_config);
228 237
229 //---------- rnn lm model config ---------- 238 //---------- rnn lm model config ----------
230 fid = env->GetFieldID(cls, "lmConfig", 239 fid = env->GetFieldID(cls, "lmConfig",
@@ -165,6 +165,11 @@ type @@ -165,6 +165,11 @@ type
165 function ToString: AnsiString; 165 function ToString: AnsiString;
166 end; 166 end;
167 167
  168 + TSherpaOnnxOnlineNemoCtcModelConfig = record
  169 + Model: AnsiString;
  170 + function ToString: AnsiString;
  171 + end;
  172 +
168 TSherpaOnnxOnlineModelConfig = record 173 TSherpaOnnxOnlineModelConfig = record
169 Transducer: TSherpaOnnxOnlineTransducerModelConfig; 174 Transducer: TSherpaOnnxOnlineTransducerModelConfig;
170 Paraformer: TSherpaOnnxOnlineParaformerModelConfig; 175 Paraformer: TSherpaOnnxOnlineParaformerModelConfig;
@@ -178,6 +183,7 @@ type @@ -178,6 +183,7 @@ type
178 BpeVocab: AnsiString; 183 BpeVocab: AnsiString;
179 TokensBuf: AnsiString; 184 TokensBuf: AnsiString;
180 TokensBufSize: Integer; 185 TokensBufSize: Integer;
  186 + NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig;
181 function ToString: AnsiString; 187 function ToString: AnsiString;
182 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); 188 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig);
183 end; 189 end;
@@ -691,6 +697,10 @@ type @@ -691,6 +697,10 @@ type
691 Model: PAnsiChar; 697 Model: PAnsiChar;
692 end; 698 end;
693 699
  700 + SherpaOnnxOnlineNemoCtcModelConfig = record
  701 + Model: PAnsiChar;
  702 + end;
  703 +
694 SherpaOnnxOnlineModelConfig= record 704 SherpaOnnxOnlineModelConfig= record
695 Transducer: SherpaOnnxOnlineTransducerModelConfig; 705 Transducer: SherpaOnnxOnlineTransducerModelConfig;
696 Paraformer: SherpaOnnxOnlineParaformerModelConfig; 706 Paraformer: SherpaOnnxOnlineParaformerModelConfig;
@@ -704,6 +714,7 @@ type @@ -704,6 +714,7 @@ type
704 BpeVocab: PAnsiChar; 714 BpeVocab: PAnsiChar;
705 TokensBuf: PAnsiChar; 715 TokensBuf: PAnsiChar;
706 TokensBufSize: cint32; 716 TokensBufSize: cint32;
  717 + NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig;
707 end; 718 end;
708 SherpaOnnxFeatureConfig = record 719 SherpaOnnxFeatureConfig = record
709 SampleRate: cint32; 720 SampleRate: cint32;
@@ -1311,6 +1322,12 @@ begin @@ -1311,6 +1322,12 @@ begin
1311 [Self.Model]); 1322 [Self.Model]);
1312 end; 1323 end;
1313 1324
  1325 +function TSherpaOnnxOnlineNemoCtcModelConfig.ToString: AnsiString;
  1326 +begin
  1327 + Result := Format('TSherpaOnnxOnlineNemoCtcModelConfig(Model := %s)',
  1328 + [Self.Model]);
  1329 +end;
  1330 +
1314 function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; 1331 function TSherpaOnnxOnlineModelConfig.ToString: AnsiString;
1315 begin 1332 begin
1316 Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + 1333 Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' +
@@ -1322,12 +1339,13 @@ begin @@ -1322,12 +1339,13 @@ begin
1322 'Debug := %s, ' + 1339 'Debug := %s, ' +
1323 'ModelType := %s, ' + 1340 'ModelType := %s, ' +
1324 'ModelingUnit := %s, ' + 1341 'ModelingUnit := %s, ' +
1325 - 'BpeVocab := %s)'  
1326 - , 1342 + 'BpeVocab := %s, ' +
  1343 + 'NemoCtc := %s',
1327 [Self.Transducer.ToString, Self.Paraformer.ToString, 1344 [Self.Transducer.ToString, Self.Paraformer.ToString,
1328 Self.Zipformer2Ctc.ToString, Self.Tokens, 1345 Self.Zipformer2Ctc.ToString, Self.Tokens,
1329 Self.NumThreads, Self.Provider, Self.Debug.ToString, 1346 Self.NumThreads, Self.Provider, Self.Debug.ToString,
1330 - Self.ModelType, Self.ModelingUnit, Self.BpeVocab 1347 + Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
  1348 + Self.NemoCtc.ToString
1331 ]); 1349 ]);
1332 end; 1350 end;
1333 1351
@@ -1426,6 +1444,7 @@ begin @@ -1426,6 +1444,7 @@ begin
1426 C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder); 1444 C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder);
1427 1445
1428 C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); 1446 C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model);
  1447 + C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model);
1429 1448
1430 C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); 1449 C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens);
1431 C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; 1450 C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads;
@@ -128,77 +128,69 @@ class TestOfflineRecognizer(unittest.TestCase): @@ -128,77 +128,69 @@ class TestOfflineRecognizer(unittest.TestCase):
128 print(s2.result.text) 128 print(s2.result.text)
129 129
130 def test_paraformer_single_file(self): 130 def test_paraformer_single_file(self):
131 - for use_int8 in [True, False]:  
132 - if use_int8:  
133 - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"  
134 - else:  
135 - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.onnx" 131 + model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"
136 132
137 - tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"  
138 - wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav" 133 + tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
  134 + wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav"
139 135
140 - if not Path(model).is_file():  
141 - print("skipping test_paraformer_single_file()")  
142 - return 136 + if not Path(model).is_file():
  137 + print("skipping test_paraformer_single_file()")
  138 + return
143 139
144 - recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(  
145 - paraformer=model,  
146 - tokens=tokens,  
147 - num_threads=1,  
148 - provider="cpu",  
149 - ) 140 + recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
  141 + paraformer=model,
  142 + tokens=tokens,
  143 + num_threads=1,
  144 + provider="cpu",
  145 + )
150 146
151 - s = recognizer.create_stream()  
152 - samples, sample_rate = read_wave(wave0)  
153 - s.accept_waveform(sample_rate, samples)  
154 - recognizer.decode_stream(s)  
155 - print(s.result.text) 147 + s = recognizer.create_stream()
  148 + samples, sample_rate = read_wave(wave0)
  149 + s.accept_waveform(sample_rate, samples)
  150 + recognizer.decode_stream(s)
  151 + print(s.result.text)
156 152
157 def test_paraformer_multiple_files(self): 153 def test_paraformer_multiple_files(self):
158 - for use_int8 in [True, False]:  
159 - if use_int8:  
160 - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"  
161 - else:  
162 - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.onnx"  
163 -  
164 - tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"  
165 - wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav"  
166 - wave1 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav"  
167 - wave2 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav"  
168 - wave3 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav"  
169 -  
170 - if not Path(model).is_file():  
171 - print("skipping test_paraformer_multiple_files()")  
172 - return  
173 -  
174 - recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(  
175 - paraformer=model,  
176 - tokens=tokens,  
177 - num_threads=1,  
178 - provider="cpu",  
179 - )  
180 -  
181 - s0 = recognizer.create_stream()  
182 - samples0, sample_rate0 = read_wave(wave0)  
183 - s0.accept_waveform(sample_rate0, samples0)  
184 -  
185 - s1 = recognizer.create_stream()  
186 - samples1, sample_rate1 = read_wave(wave1)  
187 - s1.accept_waveform(sample_rate1, samples1)  
188 -  
189 - s2 = recognizer.create_stream()  
190 - samples2, sample_rate2 = read_wave(wave2)  
191 - s2.accept_waveform(sample_rate2, samples2)  
192 -  
193 - s3 = recognizer.create_stream()  
194 - samples3, sample_rate3 = read_wave(wave3)  
195 - s3.accept_waveform(sample_rate3, samples3)  
196 -  
197 - recognizer.decode_streams([s0, s1, s2, s3])  
198 - print(s0.result.text)  
199 - print(s1.result.text)  
200 - print(s2.result.text)  
201 - print(s3.result.text) 154 + model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"
  155 +
  156 + tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
  157 + wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav"
  158 + wave1 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav"
  159 + wave2 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav"
  160 + wave3 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav"
  161 +
  162 + if not Path(model).is_file():
  163 + print("skipping test_paraformer_multiple_files()")
  164 + return
  165 +
  166 + recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer(
  167 + paraformer=model,
  168 + tokens=tokens,
  169 + num_threads=1,
  170 + provider="cpu",
  171 + )
  172 +
  173 + s0 = recognizer.create_stream()
  174 + samples0, sample_rate0 = read_wave(wave0)
  175 + s0.accept_waveform(sample_rate0, samples0)
  176 +
  177 + s1 = recognizer.create_stream()
  178 + samples1, sample_rate1 = read_wave(wave1)
  179 + s1.accept_waveform(sample_rate1, samples1)
  180 +
  181 + s2 = recognizer.create_stream()
  182 + samples2, sample_rate2 = read_wave(wave2)
  183 + s2.accept_waveform(sample_rate2, samples2)
  184 +
  185 + s3 = recognizer.create_stream()
  186 + samples3, sample_rate3 = read_wave(wave3)
  187 + s3.accept_waveform(sample_rate3, samples3)
  188 +
  189 + recognizer.decode_streams([s0, s1, s2, s3])
  190 + print(s0.result.text)
  191 + print(s1.result.text)
  192 + print(s2.result.text)
  193 + print(s3.result.text)
202 194
203 def test_nemo_ctc_single_file(self): 195 def test_nemo_ctc_single_file(self):
204 for use_int8 in [True, False]: 196 for use_int8 in [True, False]:
@@ -68,6 +68,14 @@ func sherpaOnnxOnlineZipformer2CtcModelConfig( @@ -68,6 +68,14 @@ func sherpaOnnxOnlineZipformer2CtcModelConfig(
68 ) 68 )
69 } 69 }
70 70
  71 +func sherpaOnnxOnlineNemoCtcModelConfig(
  72 + model: String = ""
  73 +) -> SherpaOnnxOnlineNemoCtcModelConfig {
  74 + return SherpaOnnxOnlineNemoCtcModelConfig(
  75 + model: toCPointer(model)
  76 + )
  77 +}
  78 +
71 /// Return an instance of SherpaOnnxOnlineModelConfig. 79 /// Return an instance of SherpaOnnxOnlineModelConfig.
72 /// 80 ///
73 /// Please refer to 81 /// Please refer to
@@ -92,7 +100,8 @@ func sherpaOnnxOnlineModelConfig( @@ -92,7 +100,8 @@ func sherpaOnnxOnlineModelConfig(
92 modelingUnit: String = "cjkchar", 100 modelingUnit: String = "cjkchar",
93 bpeVocab: String = "", 101 bpeVocab: String = "",
94 tokensBuf: String = "", 102 tokensBuf: String = "",
95 - tokensBufSize: Int = 0 103 + tokensBufSize: Int = 0,
  104 + nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig()
96 ) -> SherpaOnnxOnlineModelConfig { 105 ) -> SherpaOnnxOnlineModelConfig {
97 return SherpaOnnxOnlineModelConfig( 106 return SherpaOnnxOnlineModelConfig(
98 transducer: transducer, 107 transducer: transducer,
@@ -106,7 +115,8 @@ func sherpaOnnxOnlineModelConfig( @@ -106,7 +115,8 @@ func sherpaOnnxOnlineModelConfig(
106 modeling_unit: toCPointer(modelingUnit), 115 modeling_unit: toCPointer(modelingUnit),
107 bpe_vocab: toCPointer(bpeVocab), 116 bpe_vocab: toCPointer(bpeVocab),
108 tokens_buf: toCPointer(tokensBuf), 117 tokens_buf: toCPointer(tokensBuf),
109 - tokens_buf_size: Int32(tokensBufSize) 118 + tokens_buf_size: Int32(tokensBufSize),
  119 + nemo_ctc: nemoCtc
110 ) 120 )
111 } 121 }
112 122
@@ -15,8 +15,8 @@ function freeConfig(config, Module) { @@ -15,8 +15,8 @@ function freeConfig(config, Module) {
15 freeConfig(config.paraformer, Module) 15 freeConfig(config.paraformer, Module)
16 } 16 }
17 17
18 - if ('ctc' in config) {  
19 - freeConfig(config.ctc, Module) 18 + if ('zipformer2Ctc' in config) {
  19 + freeConfig(config.zipformer2Ctc, Module)
20 } 20 }
21 21
22 if ('feat' in config) { 22 if ('feat' in config) {
@@ -157,6 +157,22 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { @@ -157,6 +157,22 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
157 } 157 }
158 } 158 }
159 159
  160 +function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) {
  161 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
  162 + const buffer = Module._malloc(n);
  163 +
  164 + const len = 1 * 4; // 1 pointer
  165 + const ptr = Module._malloc(len);
  166 +
  167 + Module.stringToUTF8(config.model || '', buffer, n);
  168 +
  169 + Module.setValue(ptr, buffer, 'i8*');
  170 +
  171 + return {
  172 + buffer: buffer, ptr: ptr, len: len,
  173 + }
  174 +}
  175 +
160 function initSherpaOnnxOnlineModelConfig(config, Module) { 176 function initSherpaOnnxOnlineModelConfig(config, Module) {
161 if (!('transducer' in config)) { 177 if (!('transducer' in config)) {
162 config.transducer = { 178 config.transducer = {
@@ -179,6 +195,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -179,6 +195,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
179 }; 195 };
180 } 196 }
181 197
  198 + if (!('nemoCtc' in config)) {
  199 + config.nemoCtc = {
  200 + model: '',
  201 + };
  202 + }
  203 +
182 if (!('tokensBuf' in config)) { 204 if (!('tokensBuf' in config)) {
183 config.tokensBuf = ''; 205 config.tokensBuf = '';
184 } 206 }
@@ -193,10 +215,15 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -193,10 +215,15 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
193 const paraformer = 215 const paraformer =
194 initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); 216 initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
195 217
196 - const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( 218 + const zipformer2Ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
197 config.zipformer2Ctc, Module); 219 config.zipformer2Ctc, Module);
198 220
199 - const len = transducer.len + paraformer.len + ctc.len + 9 * 4; 221 + const nemoCtc =
  222 + initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module);
  223 +
  224 + const len =
  225 + transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 + nemoCtc.len;
  226 +
200 const ptr = Module._malloc(len); 227 const ptr = Module._malloc(len);
201 228
202 let offset = 0; 229 let offset = 0;
@@ -206,8 +233,8 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -206,8 +233,8 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
206 Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); 233 Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
207 offset += paraformer.len; 234 offset += paraformer.len;
208 235
209 - Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);  
210 - offset += ctc.len; 236 + Module._CopyHeap(zipformer2Ctc.ptr, zipformer2Ctc.len, ptr + offset);
  237 + offset += zipformer2Ctc.len;
211 238
212 const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; 239 const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
213 const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; 240 const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
@@ -240,7 +267,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -240,7 +267,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
240 Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); 267 Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen);
241 offset += tokensBufLen; 268 offset += tokensBufLen;
242 269
243 - offset = transducer.len + paraformer.len + ctc.len; 270 + offset = transducer.len + paraformer.len + zipformer2Ctc.len;
244 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens 271 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
245 offset += 4; 272 offset += 4;
246 273
@@ -278,9 +305,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { @@ -278,9 +305,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) {
278 Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); 305 Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32');
279 offset += 4; 306 offset += 4;
280 307
  308 + Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
  309 + offset += nemoCtc.len;
  310 +
281 return { 311 return {
282 buffer: buffer, ptr: ptr, len: len, transducer: transducer, 312 buffer: buffer, ptr: ptr, len: len, transducer: transducer,
283 - paraformer: paraformer, ctc: ctc 313 + paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc
284 } 314 }
285 } 315 }
286 316
@@ -485,6 +515,10 @@ function createOnlineRecognizer(Module, myConfig) { @@ -485,6 +515,10 @@ function createOnlineRecognizer(Module, myConfig) {
485 model: '', 515 model: '',
486 }; 516 };
487 517
  518 + const onlineNemoCtcModelConfig = {
  519 + model: '',
  520 + };
  521 +
488 let type = 0; 522 let type = 0;
489 523
490 switch (type) { 524 switch (type) {
@@ -500,9 +534,13 @@ function createOnlineRecognizer(Module, myConfig) { @@ -500,9 +534,13 @@ function createOnlineRecognizer(Module, myConfig) {
500 onlineParaformerModelConfig.decoder = './decoder.onnx'; 534 onlineParaformerModelConfig.decoder = './decoder.onnx';
501 break; 535 break;
502 case 2: 536 case 2:
503 - // ctc 537 + // zipformer2Ctc
504 onlineZipformer2CtcModelConfig.model = './encoder.onnx'; 538 onlineZipformer2CtcModelConfig.model = './encoder.onnx';
505 break; 539 break;
  540 + case 3:
  541 + // nemoCtc
  542 + onlineNemoCtcModelConfig.model = './nemo-ctc.onnx';
  543 + break;
506 } 544 }
507 545
508 546
@@ -510,6 +548,7 @@ function createOnlineRecognizer(Module, myConfig) { @@ -510,6 +548,7 @@ function createOnlineRecognizer(Module, myConfig) {
510 transducer: onlineTransducerModelConfig, 548 transducer: onlineTransducerModelConfig,
511 paraformer: onlineParaformerModelConfig, 549 paraformer: onlineParaformerModelConfig,
512 zipformer2Ctc: onlineZipformer2CtcModelConfig, 550 zipformer2Ctc: onlineZipformer2CtcModelConfig,
  551 + nemoCtc: onlineNemoCtcModelConfig,
513 tokens: './tokens.txt', 552 tokens: './tokens.txt',
514 numThreads: 1, 553 numThreads: 1,
515 provider: 'cpu', 554 provider: 'cpu',
@@ -16,10 +16,12 @@ extern "C" { @@ -16,10 +16,12 @@ extern "C" {
16 static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, ""); 16 static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, "");
17 static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, ""); 17 static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, "");
18 static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, ""); 18 static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, "");
  19 +static_assert(sizeof(SherpaOnnxOnlineNemoCtcModelConfig) == 1 * 4, "");
19 static_assert(sizeof(SherpaOnnxOnlineModelConfig) == 20 static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
20 sizeof(SherpaOnnxOnlineTransducerModelConfig) + 21 sizeof(SherpaOnnxOnlineTransducerModelConfig) +
21 sizeof(SherpaOnnxOnlineParaformerModelConfig) + 22 sizeof(SherpaOnnxOnlineParaformerModelConfig) +
22 - sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4, 23 + sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 +
  24 + sizeof(SherpaOnnxOnlineNemoCtcModelConfig),
23 ""); 25 "");
24 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); 26 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
25 static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); 27 static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
@@ -36,6 +38,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { @@ -36,6 +38,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
36 auto transducer_model_config = &model_config->transducer; 38 auto transducer_model_config = &model_config->transducer;
37 auto paraformer_model_config = &model_config->paraformer; 39 auto paraformer_model_config = &model_config->paraformer;
38 auto ctc_model_config = &model_config->zipformer2_ctc; 40 auto ctc_model_config = &model_config->zipformer2_ctc;
  41 + auto nemo_ctc = &model_config->nemo_ctc;
39 42
40 fprintf(stdout, "----------online transducer model config----------\n"); 43 fprintf(stdout, "----------online transducer model config----------\n");
41 fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); 44 fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder);
@@ -46,8 +49,12 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { @@ -46,8 +49,12 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
46 fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder); 49 fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder);
47 fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder); 50 fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder);
48 51
49 - fprintf(stdout, "----------online ctc model config----------\n"); 52 + fprintf(stdout, "----------online zipformer2 ctc model config----------\n");
50 fprintf(stdout, "model: %s\n", ctc_model_config->model); 53 fprintf(stdout, "model: %s\n", ctc_model_config->model);
  54 +
  55 + fprintf(stdout, "----------online nemo ctc model config----------\n");
  56 + fprintf(stdout, "model: %s\n", nemo_ctc->model);
  57 +
51 fprintf(stdout, "tokens: %s\n", model_config->tokens); 58 fprintf(stdout, "tokens: %s\n", model_config->tokens);
52 fprintf(stdout, "num_threads: %d\n", model_config->num_threads); 59 fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
53 fprintf(stdout, "provider: %s\n", model_config->provider); 60 fprintf(stdout, "provider: %s\n", model_config->provider);
@@ -73,9 +73,12 @@ function initModelConfig(config, Module) { @@ -73,9 +73,12 @@ function initModelConfig(config, Module) {
73 const transducer = 73 const transducer =
74 initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); 74 initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
75 const paraformer_len = 2 * 4 75 const paraformer_len = 2 * 4
76 - const ctc_len = 1 * 4 76 + const zipfomer2_ctc_len = 1 * 4
  77 + const nemo_ctc_len = 1 * 4
  78 +
  79 + const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 +
  80 + nemo_ctc_len;
77 81
78 - const len = transducer.len + paraformer_len + ctc_len + 9 * 4;  
79 const ptr = Module._malloc(len); 82 const ptr = Module._malloc(len);
80 Module.HEAPU8.fill(0, ptr, ptr + len); 83 Module.HEAPU8.fill(0, ptr, ptr + len);
81 84
@@ -112,7 +115,7 @@ function initModelConfig(config, Module) { @@ -112,7 +115,7 @@ function initModelConfig(config, Module) {
112 Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); 115 Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen);
113 offset += tokensBufLen; 116 offset += tokensBufLen;
114 117
115 - offset = transducer.len + paraformer_len + ctc_len; 118 + offset = transducer.len + paraformer_len + zipfomer2_ctc_len;
116 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens 119 Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
117 offset += 4; 120 offset += 4;
118 121