Committed by
GitHub
Add APIs for Online NeMo CTC models (#2454)
正在显示
31 个修改的文件
包含
518 行增加
和
381 行删除
| @@ -9,6 +9,49 @@ git status | @@ -9,6 +9,49 @@ git status | ||
| 9 | ls -lh | 9 | ls -lh |
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | +# online asr | ||
| 13 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 14 | +tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 15 | +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 16 | +node ./test-online-paraformer.js | ||
| 17 | +rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | ||
| 18 | + | ||
| 19 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 20 | +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 21 | +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 22 | + | ||
| 23 | +rm -f itn* | ||
| 24 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav | ||
| 25 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 26 | + | ||
| 27 | +node ./test-online-transducer-itn.js | ||
| 28 | + | ||
| 29 | +node ./test-online-transducer.js | ||
| 30 | + | ||
| 31 | +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 32 | + | ||
| 33 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 34 | +tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 35 | +rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 36 | + | ||
| 37 | +node ./test-online-zipformer2-ctc.js | ||
| 38 | +rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 | ||
| 39 | + | ||
| 40 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 41 | +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 42 | +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 43 | +node ./test-online-zipformer2-ctc-hlg.js | ||
| 44 | +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 | ||
| 45 | + | ||
| 46 | +echo "----------keyword spotting----------" | ||
| 47 | + | ||
| 48 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 49 | +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 50 | +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 51 | + | ||
| 52 | +node ./test-keyword-spotter-transducer.js | ||
| 53 | +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 | ||
| 54 | + | ||
| 12 | # asr with offline nemo canary | 55 | # asr with offline nemo canary |
| 13 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | 56 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 |
| 14 | tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | 57 | tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 |
| @@ -145,15 +188,6 @@ rm Obama.wav | @@ -145,15 +188,6 @@ rm Obama.wav | ||
| 145 | rm silero_vad.onnx | 188 | rm silero_vad.onnx |
| 146 | rm -rf sherpa-onnx-whisper-tiny.en | 189 | rm -rf sherpa-onnx-whisper-tiny.en |
| 147 | 190 | ||
| 148 | -echo "----------keyword spotting----------" | ||
| 149 | - | ||
| 150 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 151 | -tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 152 | -rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 153 | - | ||
| 154 | -node ./test-keyword-spotter-transducer.js | ||
| 155 | -rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 | ||
| 156 | - | ||
| 157 | # offline asr | 191 | # offline asr |
| 158 | # | 192 | # |
| 159 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | 193 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 |
| @@ -218,37 +252,3 @@ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | @@ -218,37 +252,3 @@ rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 218 | 252 | ||
| 219 | node ./test-offline-moonshine.js | 253 | node ./test-offline-moonshine.js |
| 220 | rm -rf sherpa-onnx-moonshine-* | 254 | rm -rf sherpa-onnx-moonshine-* |
| 221 | - | ||
| 222 | -# online asr | ||
| 223 | -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 224 | -tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 225 | -rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 226 | -node ./test-online-paraformer.js | ||
| 227 | -rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | ||
| 228 | - | ||
| 229 | -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 230 | -tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 231 | -rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 232 | - | ||
| 233 | -rm -f itn* | ||
| 234 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav | ||
| 235 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 236 | - | ||
| 237 | -node ./test-online-transducer-itn.js | ||
| 238 | - | ||
| 239 | -node ./test-online-transducer.js | ||
| 240 | - | ||
| 241 | -rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 242 | - | ||
| 243 | -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 244 | -tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 245 | -rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 246 | - | ||
| 247 | -node ./test-online-zipformer2-ctc.js | ||
| 248 | -rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 | ||
| 249 | - | ||
| 250 | -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 251 | -tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 252 | -rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 253 | -node ./test-online-zipformer2-ctc-hlg.js | ||
| 254 | -rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 |
| @@ -148,7 +148,7 @@ to download pre-trained non-streaming zipformer models. | @@ -148,7 +148,7 @@ to download pre-trained non-streaming zipformer models. | ||
| 148 | 148 | ||
| 149 | dotnet run \ | 149 | dotnet run \ |
| 150 | --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ | 150 | --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ |
| 151 | - --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.onnx \ | 151 | + --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \ |
| 152 | --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ | 152 | --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ |
| 153 | ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \ | 153 | ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \ |
| 154 | ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \ | 154 | ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \ |
| @@ -18,7 +18,7 @@ fi | @@ -18,7 +18,7 @@ fi | ||
| 18 | 18 | ||
| 19 | dotnet run \ | 19 | dotnet run \ |
| 20 | --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ | 20 | --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ |
| 21 | - --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.onnx \ | 21 | + --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \ |
| 22 | --rule-fsts=./itn_zh_number.fst \ | 22 | --rule-fsts=./itn_zh_number.fst \ |
| 23 | --num-threads=2 \ | 23 | --num-threads=2 \ |
| 24 | --files ./itn-zh-number.wav | 24 | --files ./itn-zh-number.wav |
| @@ -10,7 +10,7 @@ fi | @@ -10,7 +10,7 @@ fi | ||
| 10 | 10 | ||
| 11 | dotnet run \ | 11 | dotnet run \ |
| 12 | --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ | 12 | --tokens=./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \ |
| 13 | - --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.onnx \ | 13 | + --paraformer=./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \ |
| 14 | --num-threads=2 \ | 14 | --num-threads=2 \ |
| 15 | --files ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \ | 15 | --files ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav \ |
| 16 | ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \ | 16 | ./sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav \ |
| @@ -13,6 +13,4 @@ dotnet run \ | @@ -13,6 +13,4 @@ dotnet run \ | ||
| 13 | --tokens=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \ | 13 | --tokens=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/tokens.txt \ |
| 14 | --zipformer-ctc=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \ | 14 | --zipformer-ctc=./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/model.int8.onnx \ |
| 15 | --num-threads=1 \ | 15 | --num-threads=1 \ |
| 16 | - --files ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav \ | ||
| 17 | - ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/1.wav \ | ||
| 18 | - ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/8k.wav | 16 | + --files ./sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03/test_wavs/0.wav |
| @@ -121,6 +121,9 @@ class KeywordSpotter { | @@ -121,6 +121,9 @@ class KeywordSpotter { | ||
| 121 | c.ref.model.zipformer2Ctc.model = | 121 | c.ref.model.zipformer2Ctc.model = |
| 122 | config.model.zipformer2Ctc.model.toNativeUtf8(); | 122 | config.model.zipformer2Ctc.model.toNativeUtf8(); |
| 123 | 123 | ||
| 124 | + // nemoCtc | ||
| 125 | + c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8(); | ||
| 126 | + | ||
| 124 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); | 127 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); |
| 125 | c.ref.model.numThreads = config.model.numThreads; | 128 | c.ref.model.numThreads = config.model.numThreads; |
| 126 | c.ref.model.provider = config.model.provider.toNativeUtf8(); | 129 | c.ref.model.provider = config.model.provider.toNativeUtf8(); |
| @@ -146,6 +149,7 @@ class KeywordSpotter { | @@ -146,6 +149,7 @@ class KeywordSpotter { | ||
| 146 | calloc.free(c.ref.model.modelType); | 149 | calloc.free(c.ref.model.modelType); |
| 147 | calloc.free(c.ref.model.provider); | 150 | calloc.free(c.ref.model.provider); |
| 148 | calloc.free(c.ref.model.tokens); | 151 | calloc.free(c.ref.model.tokens); |
| 152 | + calloc.free(c.ref.model.nemoCtc.model); | ||
| 149 | calloc.free(c.ref.model.zipformer2Ctc.model); | 153 | calloc.free(c.ref.model.zipformer2Ctc.model); |
| 150 | calloc.free(c.ref.model.paraformer.encoder); | 154 | calloc.free(c.ref.model.paraformer.encoder); |
| 151 | calloc.free(c.ref.model.paraformer.decoder); | 155 | calloc.free(c.ref.model.paraformer.decoder); |
| @@ -86,11 +86,33 @@ class OnlineZipformer2CtcModelConfig { | @@ -86,11 +86,33 @@ class OnlineZipformer2CtcModelConfig { | ||
| 86 | final String model; | 86 | final String model; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | +class OnlineNemoCtcModelConfig { | ||
| 90 | + const OnlineNemoCtcModelConfig({this.model = ''}); | ||
| 91 | + | ||
| 92 | + factory OnlineNemoCtcModelConfig.fromJson(Map<String, dynamic> json) { | ||
| 93 | + return OnlineNemoCtcModelConfig( | ||
| 94 | + model: json['model'] as String? ?? '', | ||
| 95 | + ); | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + @override | ||
| 99 | + String toString() { | ||
| 100 | + return 'OnlineNemoCtcModelConfig(model: $model)'; | ||
| 101 | + } | ||
| 102 | + | ||
| 103 | + Map<String, dynamic> toJson() => { | ||
| 104 | + 'model': model, | ||
| 105 | + }; | ||
| 106 | + | ||
| 107 | + final String model; | ||
| 108 | +} | ||
| 109 | + | ||
| 89 | class OnlineModelConfig { | 110 | class OnlineModelConfig { |
| 90 | const OnlineModelConfig({ | 111 | const OnlineModelConfig({ |
| 91 | this.transducer = const OnlineTransducerModelConfig(), | 112 | this.transducer = const OnlineTransducerModelConfig(), |
| 92 | this.paraformer = const OnlineParaformerModelConfig(), | 113 | this.paraformer = const OnlineParaformerModelConfig(), |
| 93 | this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), | 114 | this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), |
| 115 | + this.nemoCtc = const OnlineNemoCtcModelConfig(), | ||
| 94 | required this.tokens, | 116 | required this.tokens, |
| 95 | this.numThreads = 1, | 117 | this.numThreads = 1, |
| 96 | this.provider = 'cpu', | 118 | this.provider = 'cpu', |
| @@ -108,6 +130,8 @@ class OnlineModelConfig { | @@ -108,6 +130,8 @@ class OnlineModelConfig { | ||
| 108 | json['paraformer'] as Map<String, dynamic>? ?? const {}), | 130 | json['paraformer'] as Map<String, dynamic>? ?? const {}), |
| 109 | zipformer2Ctc: OnlineZipformer2CtcModelConfig.fromJson( | 131 | zipformer2Ctc: OnlineZipformer2CtcModelConfig.fromJson( |
| 110 | json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}), | 132 | json['zipformer2Ctc'] as Map<String, dynamic>? ?? const {}), |
| 133 | + nemoCtc: OnlineNemoCtcModelConfig.fromJson( | ||
| 134 | + json['nemoCtc'] as Map<String, dynamic>? ?? const {}), | ||
| 111 | tokens: json['tokens'] as String, | 135 | tokens: json['tokens'] as String, |
| 112 | numThreads: json['numThreads'] as int? ?? 1, | 136 | numThreads: json['numThreads'] as int? ?? 1, |
| 113 | provider: json['provider'] as String? ?? 'cpu', | 137 | provider: json['provider'] as String? ?? 'cpu', |
| @@ -120,13 +144,14 @@ class OnlineModelConfig { | @@ -120,13 +144,14 @@ class OnlineModelConfig { | ||
| 120 | 144 | ||
| 121 | @override | 145 | @override |
| 122 | String toString() { | 146 | String toString() { |
| 123 | - return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; | 147 | + return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, nemoCtc: $nemoCtc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)'; |
| 124 | } | 148 | } |
| 125 | 149 | ||
| 126 | Map<String, dynamic> toJson() => { | 150 | Map<String, dynamic> toJson() => { |
| 127 | 'transducer': transducer.toJson(), | 151 | 'transducer': transducer.toJson(), |
| 128 | 'paraformer': paraformer.toJson(), | 152 | 'paraformer': paraformer.toJson(), |
| 129 | 'zipformer2Ctc': zipformer2Ctc.toJson(), | 153 | 'zipformer2Ctc': zipformer2Ctc.toJson(), |
| 154 | + 'nemoCtc': nemoCtc.toJson(), | ||
| 130 | 'tokens': tokens, | 155 | 'tokens': tokens, |
| 131 | 'numThreads': numThreads, | 156 | 'numThreads': numThreads, |
| 132 | 'provider': provider, | 157 | 'provider': provider, |
| @@ -139,6 +164,7 @@ class OnlineModelConfig { | @@ -139,6 +164,7 @@ class OnlineModelConfig { | ||
| 139 | final OnlineTransducerModelConfig transducer; | 164 | final OnlineTransducerModelConfig transducer; |
| 140 | final OnlineParaformerModelConfig paraformer; | 165 | final OnlineParaformerModelConfig paraformer; |
| 141 | final OnlineZipformer2CtcModelConfig zipformer2Ctc; | 166 | final OnlineZipformer2CtcModelConfig zipformer2Ctc; |
| 167 | + final OnlineNemoCtcModelConfig nemoCtc; | ||
| 142 | 168 | ||
| 143 | final String tokens; | 169 | final String tokens; |
| 144 | 170 | ||
| @@ -333,6 +359,9 @@ class OnlineRecognizer { | @@ -333,6 +359,9 @@ class OnlineRecognizer { | ||
| 333 | c.ref.model.zipformer2Ctc.model = | 359 | c.ref.model.zipformer2Ctc.model = |
| 334 | config.model.zipformer2Ctc.model.toNativeUtf8(); | 360 | config.model.zipformer2Ctc.model.toNativeUtf8(); |
| 335 | 361 | ||
| 362 | + // nemoCtc | ||
| 363 | + c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8(); | ||
| 364 | + | ||
| 336 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); | 365 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); |
| 337 | c.ref.model.numThreads = config.model.numThreads; | 366 | c.ref.model.numThreads = config.model.numThreads; |
| 338 | c.ref.model.provider = config.model.provider.toNativeUtf8(); | 367 | c.ref.model.provider = config.model.provider.toNativeUtf8(); |
| @@ -377,6 +406,7 @@ class OnlineRecognizer { | @@ -377,6 +406,7 @@ class OnlineRecognizer { | ||
| 377 | calloc.free(c.ref.model.modelType); | 406 | calloc.free(c.ref.model.modelType); |
| 378 | calloc.free(c.ref.model.provider); | 407 | calloc.free(c.ref.model.provider); |
| 379 | calloc.free(c.ref.model.tokens); | 408 | calloc.free(c.ref.model.tokens); |
| 409 | + calloc.free(c.ref.model.nemoCtc.model); | ||
| 380 | calloc.free(c.ref.model.zipformer2Ctc.model); | 410 | calloc.free(c.ref.model.zipformer2Ctc.model); |
| 381 | calloc.free(c.ref.model.paraformer.encoder); | 411 | calloc.free(c.ref.model.paraformer.encoder); |
| 382 | calloc.free(c.ref.model.paraformer.decoder); | 412 | calloc.free(c.ref.model.paraformer.decoder); |
| @@ -388,6 +388,10 @@ final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct { | @@ -388,6 +388,10 @@ final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct { | ||
| 388 | external Pointer<Utf8> model; | 388 | external Pointer<Utf8> model; |
| 389 | } | 389 | } |
| 390 | 390 | ||
| 391 | +final class SherpaOnnxOnlineNemoCtcModelConfig extends Struct { | ||
| 392 | + external Pointer<Utf8> model; | ||
| 393 | +} | ||
| 394 | + | ||
| 391 | final class SherpaOnnxOnlineModelConfig extends Struct { | 395 | final class SherpaOnnxOnlineModelConfig extends Struct { |
| 392 | external SherpaOnnxOnlineTransducerModelConfig transducer; | 396 | external SherpaOnnxOnlineTransducerModelConfig transducer; |
| 393 | external SherpaOnnxOnlineParaformerModelConfig paraformer; | 397 | external SherpaOnnxOnlineParaformerModelConfig paraformer; |
| @@ -413,6 +417,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct { | @@ -413,6 +417,8 @@ final class SherpaOnnxOnlineModelConfig extends Struct { | ||
| 413 | 417 | ||
| 414 | @Int32() | 418 | @Int32() |
| 415 | external int tokensBufSize; | 419 | external int tokensBufSize; |
| 420 | + | ||
| 421 | + external SherpaOnnxOnlineNemoCtcModelConfig nemoCtc; | ||
| 416 | } | 422 | } |
| 417 | 423 | ||
| 418 | final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { | 424 | final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { |
| 1 | module non-streaming-canary-decode-files | 1 | module non-streaming-canary-decode-files |
| 2 | 2 | ||
| 3 | go 1.17 | 3 | go 1.17 |
| 4 | - | ||
| 5 | -require ( | ||
| 6 | - github.com/k2-fsa/sherpa-onnx-go v1.12.4 | ||
| 7 | - github.com/spf13/pflag v1.0.6 | ||
| 8 | - github.com/youpy/go-wav v0.3.2 | ||
| 9 | -) | ||
| 10 | - | ||
| 11 | -require ( | ||
| 12 | - github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect | ||
| 13 | - github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect | ||
| 14 | - github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect | ||
| 15 | - github.com/youpy/go-riff v0.1.0 // indirect | ||
| 16 | - github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect | ||
| 17 | -) |
| @@ -25,6 +25,7 @@ export { Samples, | @@ -25,6 +25,7 @@ export { Samples, | ||
| 25 | } from './src/main/ets/components/NonStreamingAsr'; | 25 | } from './src/main/ets/components/NonStreamingAsr'; |
| 26 | 26 | ||
| 27 | export { OnlineStream, | 27 | export { OnlineStream, |
| 28 | + OnlineNemoCtcModelConfig, | ||
| 28 | OnlineTransducerModelConfig, | 29 | OnlineTransducerModelConfig, |
| 29 | OnlineParaformerModelConfig, | 30 | OnlineParaformerModelConfig, |
| 30 | OnlineZipformer2CtcModelConfig, | 31 | OnlineZipformer2CtcModelConfig, |
| @@ -73,6 +73,22 @@ GetOnlineZipformer2CtcModelConfig(Napi::Object obj) { | @@ -73,6 +73,22 @@ GetOnlineZipformer2CtcModelConfig(Napi::Object obj) { | ||
| 73 | return c; | 73 | return c; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | +static SherpaOnnxOnlineNemoCtcModelConfig GetOnlineNemoCtcModelConfig( | ||
| 77 | + Napi::Object obj) { | ||
| 78 | + SherpaOnnxOnlineNemoCtcModelConfig c; | ||
| 79 | + memset(&c, 0, sizeof(c)); | ||
| 80 | + | ||
| 81 | + if (!obj.Has("nemoCtc") || !obj.Get("nemoCtc").IsObject()) { | ||
| 82 | + return c; | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + Napi::Object o = obj.Get("nemoCtc").As<Napi::Object>(); | ||
| 86 | + | ||
| 87 | + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); | ||
| 88 | + | ||
| 89 | + return c; | ||
| 90 | +} | ||
| 91 | + | ||
| 76 | static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( | 92 | static SherpaOnnxOnlineParaformerModelConfig GetOnlineParaformerModelConfig( |
| 77 | Napi::Object obj) { | 93 | Napi::Object obj) { |
| 78 | SherpaOnnxOnlineParaformerModelConfig c; | 94 | SherpaOnnxOnlineParaformerModelConfig c; |
| @@ -103,6 +119,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { | @@ -103,6 +119,7 @@ SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { | ||
| 103 | c.transducer = GetOnlineTransducerModelConfig(o); | 119 | c.transducer = GetOnlineTransducerModelConfig(o); |
| 104 | c.paraformer = GetOnlineParaformerModelConfig(o); | 120 | c.paraformer = GetOnlineParaformerModelConfig(o); |
| 105 | c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o); | 121 | c.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o); |
| 122 | + c.nemo_ctc = GetOnlineNemoCtcModelConfig(o); | ||
| 106 | 123 | ||
| 107 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 124 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 108 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 125 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| @@ -248,6 +265,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | @@ -248,6 +265,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | ||
| 248 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder); | 265 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.encoder); |
| 249 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder); | 266 | SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.decoder); |
| 250 | 267 | ||
| 268 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model); | ||
| 251 | SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model); | 269 | SHERPA_ONNX_DELETE_C_STR(c.model_config.zipformer2_ctc.model); |
| 252 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); | 270 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); |
| 253 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); | 271 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); |
| @@ -46,10 +46,15 @@ export class OnlineZipformer2CtcModelConfig { | @@ -46,10 +46,15 @@ export class OnlineZipformer2CtcModelConfig { | ||
| 46 | public model: string = ''; | 46 | public model: string = ''; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | +export class OnlineNemoCtcModelConfig { | ||
| 50 | + public model: string = ''; | ||
| 51 | +} | ||
| 52 | + | ||
| 49 | export class OnlineModelConfig { | 53 | export class OnlineModelConfig { |
| 50 | public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig(); | 54 | public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig(); |
| 51 | public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig(); | 55 | public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig(); |
| 52 | public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); | 56 | public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); |
| 57 | + public nemo_ctc: OnlineNemoCtcModelConfig = new OnlineNemoCtcModelConfig(); | ||
| 53 | public tokens: string = ''; | 58 | public tokens: string = ''; |
| 54 | public numThreads: number = 1; | 59 | public numThreads: number = 1; |
| 55 | public provider: string = 'cpu'; | 60 | public provider: string = 'cpu'; |
| @@ -338,7 +338,7 @@ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() { | @@ -338,7 +338,7 @@ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() { | ||
| 338 | msg += | 338 | msg += |
| 339 | "wget " | 339 | "wget " |
| 340 | "https://huggingface.co/csukuangfj/" | 340 | "https://huggingface.co/csukuangfj/" |
| 341 | - "sherpa-onnx-paraformer-zh-2023-09-14/resolve/main/model.onnx\r\n"; | 341 | + "sherpa-onnx-paraformer-zh-2023-09-14/resolve/main/model.int8.onnx\r\n"; |
| 342 | msg += | 342 | msg += |
| 343 | "wget " | 343 | "wget " |
| 344 | "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14/" | 344 | "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-09-14/" |
| @@ -24,6 +24,7 @@ namespace SherpaOnnx | @@ -24,6 +24,7 @@ namespace SherpaOnnx | ||
| 24 | BpeVocab = ""; | 24 | BpeVocab = ""; |
| 25 | TokensBuf = ""; | 25 | TokensBuf = ""; |
| 26 | TokensBufSize = 0; | 26 | TokensBufSize = 0; |
| 27 | + NemoCtc = new OnlineNemoCtcModelConfig(); | ||
| 27 | } | 28 | } |
| 28 | 29 | ||
| 29 | public OnlineTransducerModelConfig Transducer; | 30 | public OnlineTransducerModelConfig Transducer; |
| @@ -55,6 +56,8 @@ namespace SherpaOnnx | @@ -55,6 +56,8 @@ namespace SherpaOnnx | ||
| 55 | public string TokensBuf; | 56 | public string TokensBuf; |
| 56 | 57 | ||
| 57 | public int TokensBufSize; | 58 | public int TokensBufSize; |
| 59 | + | ||
| 60 | + public OnlineNemoCtcModelConfig NemoCtc; | ||
| 58 | } | 61 | } |
| 59 | 62 | ||
| 60 | } | 63 | } |
scripts/dotnet/OnlineNemoCtcModelConfig.cs
0 → 100644
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + [StructLayout(LayoutKind.Sequential)] | ||
| 8 | + public struct OnlineNemoCtcModelConfig | ||
| 9 | + { | ||
| 10 | + public OnlineNemoCtcModelConfig() | ||
| 11 | + { | ||
| 12 | + Model = ""; | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 16 | + public string Model; | ||
| 17 | + } | ||
| 18 | +} |
| @@ -77,6 +77,10 @@ type OnlineZipformer2CtcModelConfig struct { | @@ -77,6 +77,10 @@ type OnlineZipformer2CtcModelConfig struct { | ||
| 77 | Model string // Path to the onnx model | 77 | Model string // Path to the onnx model |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | +type OnlineNemoCtcModelConfig struct { | ||
| 81 | + Model string // Path to the onnx model | ||
| 82 | +} | ||
| 83 | + | ||
| 80 | // Configuration for online/streaming models | 84 | // Configuration for online/streaming models |
| 81 | // | 85 | // |
| 82 | // Please refer to | 86 | // Please refer to |
| @@ -87,6 +91,7 @@ type OnlineModelConfig struct { | @@ -87,6 +91,7 @@ type OnlineModelConfig struct { | ||
| 87 | Transducer OnlineTransducerModelConfig | 91 | Transducer OnlineTransducerModelConfig |
| 88 | Paraformer OnlineParaformerModelConfig | 92 | Paraformer OnlineParaformerModelConfig |
| 89 | Zipformer2Ctc OnlineZipformer2CtcModelConfig | 93 | Zipformer2Ctc OnlineZipformer2CtcModelConfig |
| 94 | + NemoCtc OnlineNemoCtcModelConfig | ||
| 90 | Tokens string // Path to tokens.txt | 95 | Tokens string // Path to tokens.txt |
| 91 | NumThreads int // Number of threads to use for neural network computation | 96 | NumThreads int // Number of threads to use for neural network computation |
| 92 | Provider string // Optional. Valid values are: cpu, cuda, coreml | 97 | Provider string // Optional. Valid values are: cpu, cuda, coreml |
| @@ -197,6 +202,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | @@ -197,6 +202,9 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | ||
| 197 | c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) | 202 | c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) |
| 198 | defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) | 203 | defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) |
| 199 | 204 | ||
| 205 | + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model) | ||
| 206 | + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) | ||
| 207 | + | ||
| 200 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 208 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 201 | defer C.free(unsafe.Pointer(c.model_config.tokens)) | 209 | defer C.free(unsafe.Pointer(c.model_config.tokens)) |
| 202 | 210 | ||
| @@ -1814,6 +1822,9 @@ func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter { | @@ -1814,6 +1822,9 @@ func NewKeywordSpotter(config *KeywordSpotterConfig) *KeywordSpotter { | ||
| 1814 | c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) | 1822 | c.model_config.zipformer2_ctc.model = C.CString(config.ModelConfig.Zipformer2Ctc.Model) |
| 1815 | defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) | 1823 | defer C.free(unsafe.Pointer(c.model_config.zipformer2_ctc.model)) |
| 1816 | 1824 | ||
| 1825 | + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCtc.Model) | ||
| 1826 | + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) | ||
| 1827 | + | ||
| 1817 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 1828 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 1818 | defer C.free(unsafe.Pointer(c.model_config.tokens)) | 1829 | defer C.free(unsafe.Pointer(c.model_config.tokens)) |
| 1819 | 1830 |
| @@ -97,6 +97,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | @@ -97,6 +97,9 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | ||
| 97 | config->model_config.tokens_buf, config->model_config.tokens_buf_size); | 97 | config->model_config.tokens_buf, config->model_config.tokens_buf_size); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | + recognizer_config.model_config.nemo_ctc.model = | ||
| 101 | + SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, ""); | ||
| 102 | + | ||
| 100 | recognizer_config.model_config.num_threads = | 103 | recognizer_config.model_config.num_threads = |
| 101 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); | 104 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); |
| 102 | recognizer_config.model_config.provider_config.provider = | 105 | recognizer_config.model_config.provider_config.provider = |
| @@ -108,8 +111,7 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | @@ -108,8 +111,7 @@ static sherpa_onnx::OnlineRecognizerConfig GetOnlineRecognizerConfig( | ||
| 108 | 111 | ||
| 109 | recognizer_config.model_config.model_type = | 112 | recognizer_config.model_config.model_type = |
| 110 | SHERPA_ONNX_OR(config->model_config.model_type, ""); | 113 | SHERPA_ONNX_OR(config->model_config.model_type, ""); |
| 111 | - recognizer_config.model_config.debug = | ||
| 112 | - SHERPA_ONNX_OR(config->model_config.debug, 0); | 114 | + recognizer_config.model_config.debug = config->model_config.debug; |
| 113 | recognizer_config.model_config.modeling_unit = | 115 | recognizer_config.model_config.modeling_unit = |
| 114 | SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); | 116 | SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); |
| 115 | 117 | ||
| @@ -431,8 +433,7 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | @@ -431,8 +433,7 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | ||
| 431 | SHERPA_ONNX_OR(config->model_config.tokens, ""); | 433 | SHERPA_ONNX_OR(config->model_config.tokens, ""); |
| 432 | recognizer_config.model_config.num_threads = | 434 | recognizer_config.model_config.num_threads = |
| 433 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); | 435 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); |
| 434 | - recognizer_config.model_config.debug = | ||
| 435 | - SHERPA_ONNX_OR(config->model_config.debug, 0); | 436 | + recognizer_config.model_config.debug = config->model_config.debug; |
| 436 | recognizer_config.model_config.provider = | 437 | recognizer_config.model_config.provider = |
| 437 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); | 438 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); |
| 438 | if (recognizer_config.model_config.provider.empty()) { | 439 | if (recognizer_config.model_config.provider.empty()) { |
| @@ -759,6 +760,9 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig( | @@ -759,6 +760,9 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig( | ||
| 759 | spotter_config.model_config.zipformer2_ctc.model = | 760 | spotter_config.model_config.zipformer2_ctc.model = |
| 760 | SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, ""); | 761 | SHERPA_ONNX_OR(config->model_config.zipformer2_ctc.model, ""); |
| 761 | 762 | ||
| 763 | + spotter_config.model_config.nemo_ctc.model = | ||
| 764 | + SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, ""); | ||
| 765 | + | ||
| 762 | spotter_config.model_config.tokens = | 766 | spotter_config.model_config.tokens = |
| 763 | SHERPA_ONNX_OR(config->model_config.tokens, ""); | 767 | SHERPA_ONNX_OR(config->model_config.tokens, ""); |
| 764 | if (config->model_config.tokens_buf && | 768 | if (config->model_config.tokens_buf && |
| @@ -777,8 +781,7 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig( | @@ -777,8 +781,7 @@ static sherpa_onnx::KeywordSpotterConfig GetKeywordSpotterConfig( | ||
| 777 | 781 | ||
| 778 | spotter_config.model_config.model_type = | 782 | spotter_config.model_config.model_type = |
| 779 | SHERPA_ONNX_OR(config->model_config.model_type, ""); | 783 | SHERPA_ONNX_OR(config->model_config.model_type, ""); |
| 780 | - spotter_config.model_config.debug = | ||
| 781 | - SHERPA_ONNX_OR(config->model_config.debug, 0); | 784 | + spotter_config.model_config.debug = config->model_config.debug; |
| 782 | 785 | ||
| 783 | spotter_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4); | 786 | spotter_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4); |
| 784 | 787 | ||
| @@ -1055,7 +1058,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig( | @@ -1055,7 +1058,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig( | ||
| 1055 | vad_config.provider = "cpu"; | 1058 | vad_config.provider = "cpu"; |
| 1056 | } | 1059 | } |
| 1057 | 1060 | ||
| 1058 | - vad_config.debug = SHERPA_ONNX_OR(config->debug, false); | 1061 | + vad_config.debug = config->debug; |
| 1059 | 1062 | ||
| 1060 | if (vad_config.debug) { | 1063 | if (vad_config.debug) { |
| 1061 | #if __OHOS__ | 1064 | #if __OHOS__ |
| @@ -1542,7 +1545,7 @@ GetSpeakerEmbeddingExtractorConfig( | @@ -1542,7 +1545,7 @@ GetSpeakerEmbeddingExtractorConfig( | ||
| 1542 | c.model = SHERPA_ONNX_OR(config->model, ""); | 1545 | c.model = SHERPA_ONNX_OR(config->model, ""); |
| 1543 | 1546 | ||
| 1544 | c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); | 1547 | c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); |
| 1545 | - c.debug = SHERPA_ONNX_OR(config->debug, 0); | 1548 | + c.debug = config->debug; |
| 1546 | c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); | 1549 | c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); |
| 1547 | if (c.provider.empty()) { | 1550 | if (c.provider.empty()) { |
| 1548 | c.provider = "cpu"; | 1551 | c.provider = "cpu"; |
| @@ -100,6 +100,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineZipformer2CtcModelConfig { | @@ -100,6 +100,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineZipformer2CtcModelConfig { | ||
| 100 | const char *model; | 100 | const char *model; |
| 101 | } SherpaOnnxOnlineZipformer2CtcModelConfig; | 101 | } SherpaOnnxOnlineZipformer2CtcModelConfig; |
| 102 | 102 | ||
| 103 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineNemoCtcModelConfig { | ||
| 104 | + const char *model; | ||
| 105 | +} SherpaOnnxOnlineNemoCtcModelConfig; | ||
| 106 | + | ||
| 103 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { | 107 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { |
| 104 | SherpaOnnxOnlineTransducerModelConfig transducer; | 108 | SherpaOnnxOnlineTransducerModelConfig transducer; |
| 105 | SherpaOnnxOnlineParaformerModelConfig paraformer; | 109 | SherpaOnnxOnlineParaformerModelConfig paraformer; |
| @@ -120,6 +124,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { | @@ -120,6 +124,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineModelConfig { | ||
| 120 | const char *tokens_buf; | 124 | const char *tokens_buf; |
| 121 | /// byte size excluding the trailing '\0' | 125 | /// byte size excluding the trailing '\0' |
| 122 | int32_t tokens_buf_size; | 126 | int32_t tokens_buf_size; |
| 127 | + SherpaOnnxOnlineNemoCtcModelConfig nemo_ctc; | ||
| 123 | } SherpaOnnxOnlineModelConfig; | 128 | } SherpaOnnxOnlineModelConfig; |
| 124 | 129 | ||
| 125 | /// It expects 16 kHz 16-bit single channel wave format. | 130 | /// It expects 16 kHz 16-bit single channel wave format. |
| @@ -69,6 +69,8 @@ OnlineRecognizer OnlineRecognizer::Create( | @@ -69,6 +69,8 @@ OnlineRecognizer OnlineRecognizer::Create( | ||
| 69 | c.model_config.zipformer2_ctc.model = | 69 | c.model_config.zipformer2_ctc.model = |
| 70 | config.model_config.zipformer2_ctc.model.c_str(); | 70 | config.model_config.zipformer2_ctc.model.c_str(); |
| 71 | 71 | ||
| 72 | + c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str(); | ||
| 73 | + | ||
| 72 | c.model_config.tokens = config.model_config.tokens.c_str(); | 74 | c.model_config.tokens = config.model_config.tokens.c_str(); |
| 73 | c.model_config.num_threads = config.model_config.num_threads; | 75 | c.model_config.num_threads = config.model_config.num_threads; |
| 74 | c.model_config.provider = config.model_config.provider.c_str(); | 76 | c.model_config.provider = config.model_config.provider.c_str(); |
| @@ -473,6 +475,8 @@ KeywordSpotter KeywordSpotter::Create(const KeywordSpotterConfig &config) { | @@ -473,6 +475,8 @@ KeywordSpotter KeywordSpotter::Create(const KeywordSpotterConfig &config) { | ||
| 473 | c.model_config.zipformer2_ctc.model = | 475 | c.model_config.zipformer2_ctc.model = |
| 474 | config.model_config.zipformer2_ctc.model.c_str(); | 476 | config.model_config.zipformer2_ctc.model.c_str(); |
| 475 | 477 | ||
| 478 | + c.model_config.nemo_ctc.model = config.model_config.nemo_ctc.model.c_str(); | ||
| 479 | + | ||
| 476 | c.model_config.tokens = config.model_config.tokens.c_str(); | 480 | c.model_config.tokens = config.model_config.tokens.c_str(); |
| 477 | c.model_config.num_threads = config.model_config.num_threads; | 481 | c.model_config.num_threads = config.model_config.num_threads; |
| 478 | c.model_config.provider = config.model_config.provider.c_str(); | 482 | c.model_config.provider = config.model_config.provider.c_str(); |
| @@ -32,10 +32,15 @@ struct OnlineZipformer2CtcModelConfig { | @@ -32,10 +32,15 @@ struct OnlineZipformer2CtcModelConfig { | ||
| 32 | std::string model; | 32 | std::string model; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | +struct OnlineNemoCtcModelConfig { | ||
| 36 | + std::string model; | ||
| 37 | +}; | ||
| 38 | + | ||
| 35 | struct OnlineModelConfig { | 39 | struct OnlineModelConfig { |
| 36 | OnlineTransducerModelConfig transducer; | 40 | OnlineTransducerModelConfig transducer; |
| 37 | OnlineParaformerModelConfig paraformer; | 41 | OnlineParaformerModelConfig paraformer; |
| 38 | OnlineZipformer2CtcModelConfig zipformer2_ctc; | 42 | OnlineZipformer2CtcModelConfig zipformer2_ctc; |
| 43 | + OnlineNemoCtcModelConfig nemo_ctc; | ||
| 39 | std::string tokens; | 44 | std::string tokens; |
| 40 | int32_t num_threads = 1; | 45 | int32_t num_threads = 1; |
| 41 | std::string provider = "cpu"; | 46 | std::string provider = "cpu"; |
| @@ -175,6 +175,77 @@ class SileroVadModelRknn::Impl { | @@ -175,6 +175,77 @@ class SileroVadModelRknn::Impl { | ||
| 175 | config_.silero_vad.threshold = threshold; | 175 | config_.silero_vad.threshold = threshold; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| 178 | + float Run(const float *samples, int32_t n) { | ||
| 179 | + std::vector<rknn_input> inputs(input_attrs_.size()); | ||
| 180 | + | ||
| 181 | + for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) { | ||
| 182 | + auto &input = inputs[i]; | ||
| 183 | + auto &attr = input_attrs_[i]; | ||
| 184 | + input.index = attr.index; | ||
| 185 | + | ||
| 186 | + if (attr.type == RKNN_TENSOR_FLOAT16) { | ||
| 187 | + input.type = RKNN_TENSOR_FLOAT32; | ||
| 188 | + } else if (attr.type == RKNN_TENSOR_INT64) { | ||
| 189 | + input.type = RKNN_TENSOR_INT64; | ||
| 190 | + } else { | ||
| 191 | + SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type, | ||
| 192 | + get_type_string(attr.type)); | ||
| 193 | + SHERPA_ONNX_EXIT(-1); | ||
| 194 | + } | ||
| 195 | + | ||
| 196 | + input.fmt = attr.fmt; | ||
| 197 | + if (i == 0) { | ||
| 198 | + input.buf = reinterpret_cast<void *>(const_cast<float *>(samples)); | ||
| 199 | + input.size = n * sizeof(float); | ||
| 200 | + } else { | ||
| 201 | + input.buf = reinterpret_cast<void *>(states_[i - 1].data()); | ||
| 202 | + input.size = states_[i - 1].size() * sizeof(float); | ||
| 203 | + } | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + std::vector<float> out(output_attrs_[0].n_elems); | ||
| 207 | + | ||
| 208 | + auto &next_states = states_; | ||
| 209 | + | ||
| 210 | + std::vector<rknn_output> outputs(output_attrs_.size()); | ||
| 211 | + | ||
| 212 | + for (int32_t i = 0; i < outputs.size(); ++i) { | ||
| 213 | + auto &output = outputs[i]; | ||
| 214 | + auto &attr = output_attrs_[i]; | ||
| 215 | + output.index = attr.index; | ||
| 216 | + output.is_prealloc = 1; | ||
| 217 | + | ||
| 218 | + if (attr.type == RKNN_TENSOR_FLOAT16) { | ||
| 219 | + output.want_float = 1; | ||
| 220 | + } else if (attr.type == RKNN_TENSOR_INT64) { | ||
| 221 | + output.want_float = 0; | ||
| 222 | + } else { | ||
| 223 | + SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type, | ||
| 224 | + get_type_string(attr.type)); | ||
| 225 | + SHERPA_ONNX_EXIT(-1); | ||
| 226 | + } | ||
| 227 | + | ||
| 228 | + if (i == 0) { | ||
| 229 | + output.size = out.size() * sizeof(float); | ||
| 230 | + output.buf = reinterpret_cast<void *>(out.data()); | ||
| 231 | + } else { | ||
| 232 | + output.size = next_states[i - 1].size() * sizeof(float); | ||
| 233 | + output.buf = reinterpret_cast<void *>(next_states[i - 1].data()); | ||
| 234 | + } | ||
| 235 | + } | ||
| 236 | + | ||
| 237 | + auto ret = rknn_inputs_set(ctx_, inputs.size(), inputs.data()); | ||
| 238 | + SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs"); | ||
| 239 | + | ||
| 240 | + ret = rknn_run(ctx_, nullptr); | ||
| 241 | + SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model"); | ||
| 242 | + | ||
| 243 | + ret = rknn_outputs_get(ctx_, outputs.size(), outputs.data(), nullptr); | ||
| 244 | + SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output"); | ||
| 245 | + | ||
| 246 | + return out[0]; | ||
| 247 | + } | ||
| 248 | + | ||
| 178 | private: | 249 | private: |
| 179 | void Init(void *model_data, size_t model_data_length) { | 250 | void Init(void *model_data, size_t model_data_length) { |
| 180 | InitContext(model_data, model_data_length, config_.debug, &ctx_); | 251 | InitContext(model_data, model_data_length, config_.debug, &ctx_); |
| @@ -267,77 +338,6 @@ class SileroVadModelRknn::Impl { | @@ -267,77 +338,6 @@ class SileroVadModelRknn::Impl { | ||
| 267 | Reset(); | 338 | Reset(); |
| 268 | } | 339 | } |
| 269 | 340 | ||
| 270 | - float Run(const float *samples, int32_t n) { | ||
| 271 | - std::vector<rknn_input> inputs(input_attrs_.size()); | ||
| 272 | - | ||
| 273 | - for (int32_t i = 0; i < static_cast<int32_t>(inputs.size()); ++i) { | ||
| 274 | - auto &input = inputs[i]; | ||
| 275 | - auto &attr = input_attrs_[i]; | ||
| 276 | - input.index = attr.index; | ||
| 277 | - | ||
| 278 | - if (attr.type == RKNN_TENSOR_FLOAT16) { | ||
| 279 | - input.type = RKNN_TENSOR_FLOAT32; | ||
| 280 | - } else if (attr.type == RKNN_TENSOR_INT64) { | ||
| 281 | - input.type = RKNN_TENSOR_INT64; | ||
| 282 | - } else { | ||
| 283 | - SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type, | ||
| 284 | - get_type_string(attr.type)); | ||
| 285 | - SHERPA_ONNX_EXIT(-1); | ||
| 286 | - } | ||
| 287 | - | ||
| 288 | - input.fmt = attr.fmt; | ||
| 289 | - if (i == 0) { | ||
| 290 | - input.buf = reinterpret_cast<void *>(const_cast<float *>(samples)); | ||
| 291 | - input.size = n * sizeof(float); | ||
| 292 | - } else { | ||
| 293 | - input.buf = reinterpret_cast<void *>(states_[i - 1].data()); | ||
| 294 | - input.size = states_[i - 1].size() * sizeof(float); | ||
| 295 | - } | ||
| 296 | - } | ||
| 297 | - | ||
| 298 | - std::vector<float> out(output_attrs_[0].n_elems); | ||
| 299 | - | ||
| 300 | - auto &next_states = states_; | ||
| 301 | - | ||
| 302 | - std::vector<rknn_output> outputs(output_attrs_.size()); | ||
| 303 | - | ||
| 304 | - for (int32_t i = 0; i < outputs.size(); ++i) { | ||
| 305 | - auto &output = outputs[i]; | ||
| 306 | - auto &attr = output_attrs_[i]; | ||
| 307 | - output.index = attr.index; | ||
| 308 | - output.is_prealloc = 1; | ||
| 309 | - | ||
| 310 | - if (attr.type == RKNN_TENSOR_FLOAT16) { | ||
| 311 | - output.want_float = 1; | ||
| 312 | - } else if (attr.type == RKNN_TENSOR_INT64) { | ||
| 313 | - output.want_float = 0; | ||
| 314 | - } else { | ||
| 315 | - SHERPA_ONNX_LOGE("Unsupported tensor type %d, %s", attr.type, | ||
| 316 | - get_type_string(attr.type)); | ||
| 317 | - SHERPA_ONNX_EXIT(-1); | ||
| 318 | - } | ||
| 319 | - | ||
| 320 | - if (i == 0) { | ||
| 321 | - output.size = out.size() * sizeof(float); | ||
| 322 | - output.buf = reinterpret_cast<void *>(out.data()); | ||
| 323 | - } else { | ||
| 324 | - output.size = next_states[i - 1].size() * sizeof(float); | ||
| 325 | - output.buf = reinterpret_cast<void *>(next_states[i - 1].data()); | ||
| 326 | - } | ||
| 327 | - } | ||
| 328 | - | ||
| 329 | - auto ret = rknn_inputs_set(ctx_, inputs.size(), inputs.data()); | ||
| 330 | - SHERPA_ONNX_RKNN_CHECK(ret, "Failed to set inputs"); | ||
| 331 | - | ||
| 332 | - ret = rknn_run(ctx_, nullptr); | ||
| 333 | - SHERPA_ONNX_RKNN_CHECK(ret, "Failed to run the model"); | ||
| 334 | - | ||
| 335 | - ret = rknn_outputs_get(ctx_, outputs.size(), outputs.data(), nullptr); | ||
| 336 | - SHERPA_ONNX_RKNN_CHECK(ret, "Failed to get model output"); | ||
| 337 | - | ||
| 338 | - return out[0]; | ||
| 339 | - } | ||
| 340 | - | ||
| 341 | private: | 341 | private: |
| 342 | VadModelConfig config_; | 342 | VadModelConfig config_; |
| 343 | rknn_context ctx_ = 0; | 343 | rknn_context ctx_ = 0; |
| @@ -395,6 +395,10 @@ void SileroVadModelRknn::SetThreshold(float threshold) { | @@ -395,6 +395,10 @@ void SileroVadModelRknn::SetThreshold(float threshold) { | ||
| 395 | impl_->SetThreshold(threshold); | 395 | impl_->SetThreshold(threshold); |
| 396 | } | 396 | } |
| 397 | 397 | ||
| 398 | +float SileroVadModelRknn::Compute(const float *samples, int32_t n) { | ||
| 399 | + return impl_->Run(samples, n); | ||
| 400 | +} | ||
| 401 | + | ||
| 398 | #if __ANDROID_API__ >= 9 | 402 | #if __ANDROID_API__ >= 9 |
| 399 | template SileroVadModelRknn::SileroVadModelRknn(AAssetManager *mgr, | 403 | template SileroVadModelRknn::SileroVadModelRknn(AAssetManager *mgr, |
| 400 | const VadModelConfig &config); | 404 | const VadModelConfig &config); |
| @@ -32,6 +32,7 @@ class SileroVadModelRknn : public VadModel { | @@ -32,6 +32,7 @@ class SileroVadModelRknn : public VadModel { | ||
| 32 | * @return Return true if speech is detected. Return false otherwise. | 32 | * @return Return true if speech is detected. Return false otherwise. |
| 33 | */ | 33 | */ |
| 34 | bool IsSpeech(const float *samples, int32_t n) override; | 34 | bool IsSpeech(const float *samples, int32_t n) override; |
| 35 | + float Compute(const float *samples, int32_t n) override; | ||
| 35 | 36 | ||
| 36 | // For silero vad V4, it is WindowShift(). | 37 | // For silero vad V4, it is WindowShift(). |
| 37 | int32_t WindowSize() const override; | 38 | int32_t WindowSize() const override; |
| @@ -89,8 +89,8 @@ void SafeJNI(JNIEnv *env, const char *functionName, Func func) { | @@ -89,8 +89,8 @@ void SafeJNI(JNIEnv *env, const char *functionName, Func func) { | ||
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | // Helper function to validate JNI pointers | 91 | // Helper function to validate JNI pointers |
| 92 | -inline bool ValidatePointer(JNIEnv *env, jlong ptr, | ||
| 93 | - const char *functionName, const char *message) { | 92 | +inline bool ValidatePointer(JNIEnv *env, jlong ptr, const char *functionName, |
| 93 | + const char *message) { | ||
| 94 | if (ptr == 0) { | 94 | if (ptr == 0) { |
| 95 | jclass exClass = env->FindClass("java/lang/NullPointerException"); | 95 | jclass exClass = env->FindClass("java/lang/NullPointerException"); |
| 96 | if (exClass != nullptr) { | 96 | if (exClass != nullptr) { |
| @@ -9,6 +9,9 @@ | @@ -9,6 +9,9 @@ | ||
| 9 | 9 | ||
| 10 | namespace sherpa_onnx { | 10 | namespace sherpa_onnx { |
| 11 | 11 | ||
| 12 | +OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls, | ||
| 13 | + jobject model_config); | ||
| 14 | + | ||
| 12 | static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) { | 15 | static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) { |
| 13 | KeywordSpotterConfig ans; | 16 | KeywordSpotterConfig ans; |
| 14 | 17 | ||
| @@ -57,54 +60,7 @@ static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) { | @@ -57,54 +60,7 @@ static KeywordSpotterConfig GetKwsConfig(JNIEnv *env, jobject config) { | ||
| 57 | "Lcom/k2fsa/sherpa/onnx/OnlineModelConfig;"); | 60 | "Lcom/k2fsa/sherpa/onnx/OnlineModelConfig;"); |
| 58 | jobject model_config = env->GetObjectField(config, fid); | 61 | jobject model_config = env->GetObjectField(config, fid); |
| 59 | jclass model_config_cls = env->GetObjectClass(model_config); | 62 | jclass model_config_cls = env->GetObjectClass(model_config); |
| 60 | - | ||
| 61 | - // transducer | ||
| 62 | - fid = env->GetFieldID(model_config_cls, "transducer", | ||
| 63 | - "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;"); | ||
| 64 | - jobject transducer_config = env->GetObjectField(model_config, fid); | ||
| 65 | - jclass transducer_config_cls = env->GetObjectClass(transducer_config); | ||
| 66 | - | ||
| 67 | - fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 68 | - s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 69 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 70 | - ans.model_config.transducer.encoder = p; | ||
| 71 | - env->ReleaseStringUTFChars(s, p); | ||
| 72 | - | ||
| 73 | - fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 74 | - s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 75 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 76 | - ans.model_config.transducer.decoder = p; | ||
| 77 | - env->ReleaseStringUTFChars(s, p); | ||
| 78 | - | ||
| 79 | - fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;"); | ||
| 80 | - s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 81 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 82 | - ans.model_config.transducer.joiner = p; | ||
| 83 | - env->ReleaseStringUTFChars(s, p); | ||
| 84 | - | ||
| 85 | - fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;"); | ||
| 86 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 87 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 88 | - ans.model_config.tokens = p; | ||
| 89 | - env->ReleaseStringUTFChars(s, p); | ||
| 90 | - | ||
| 91 | - fid = env->GetFieldID(model_config_cls, "numThreads", "I"); | ||
| 92 | - ans.model_config.num_threads = env->GetIntField(model_config, fid); | ||
| 93 | - | ||
| 94 | - fid = env->GetFieldID(model_config_cls, "debug", "Z"); | ||
| 95 | - ans.model_config.debug = env->GetBooleanField(model_config, fid); | ||
| 96 | - | ||
| 97 | - fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); | ||
| 98 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 99 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 100 | - ans.model_config.provider_config.provider = p; | ||
| 101 | - env->ReleaseStringUTFChars(s, p); | ||
| 102 | - | ||
| 103 | - fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;"); | ||
| 104 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 105 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 106 | - ans.model_config.model_type = p; | ||
| 107 | - env->ReleaseStringUTFChars(s, p); | 63 | + ans.model_config = GetOnlineModelConfig(env, model_config_cls, model_config); |
| 108 | 64 | ||
| 109 | return ans; | 65 | return ans; |
| 110 | } | 66 | } |
| @@ -10,6 +10,117 @@ | @@ -10,6 +10,117 @@ | ||
| 10 | 10 | ||
| 11 | namespace sherpa_onnx { | 11 | namespace sherpa_onnx { |
| 12 | 12 | ||
| 13 | +OnlineModelConfig GetOnlineModelConfig(JNIEnv *env, jclass model_config_cls, | ||
| 14 | + jobject model_config) { | ||
| 15 | + OnlineModelConfig ans; | ||
| 16 | + // transducer | ||
| 17 | + auto fid = | ||
| 18 | + env->GetFieldID(model_config_cls, "transducer", | ||
| 19 | + "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;"); | ||
| 20 | + jobject transducer_config = env->GetObjectField(model_config, fid); | ||
| 21 | + jclass transducer_config_cls = env->GetObjectClass(transducer_config); | ||
| 22 | + | ||
| 23 | + fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 24 | + auto s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 25 | + auto p = env->GetStringUTFChars(s, nullptr); | ||
| 26 | + ans.transducer.encoder = p; | ||
| 27 | + env->ReleaseStringUTFChars(s, p); | ||
| 28 | + | ||
| 29 | + fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 30 | + s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 31 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 32 | + ans.transducer.decoder = p; | ||
| 33 | + env->ReleaseStringUTFChars(s, p); | ||
| 34 | + | ||
| 35 | + fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;"); | ||
| 36 | + s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 37 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 38 | + ans.transducer.joiner = p; | ||
| 39 | + env->ReleaseStringUTFChars(s, p); | ||
| 40 | + | ||
| 41 | + // paraformer | ||
| 42 | + fid = env->GetFieldID(model_config_cls, "paraformer", | ||
| 43 | + "Lcom/k2fsa/sherpa/onnx/OnlineParaformerModelConfig;"); | ||
| 44 | + jobject paraformer_config = env->GetObjectField(model_config, fid); | ||
| 45 | + jclass paraformer_config_cls = env->GetObjectClass(paraformer_config); | ||
| 46 | + | ||
| 47 | + fid = env->GetFieldID(paraformer_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 48 | + s = (jstring)env->GetObjectField(paraformer_config, fid); | ||
| 49 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 50 | + ans.paraformer.encoder = p; | ||
| 51 | + env->ReleaseStringUTFChars(s, p); | ||
| 52 | + | ||
| 53 | + fid = env->GetFieldID(paraformer_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 54 | + s = (jstring)env->GetObjectField(paraformer_config, fid); | ||
| 55 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 56 | + ans.paraformer.decoder = p; | ||
| 57 | + env->ReleaseStringUTFChars(s, p); | ||
| 58 | + | ||
| 59 | + // streaming zipformer2 CTC | ||
| 60 | + fid = | ||
| 61 | + env->GetFieldID(model_config_cls, "zipformer2Ctc", | ||
| 62 | + "Lcom/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig;"); | ||
| 63 | + jobject zipformer2_ctc_config = env->GetObjectField(model_config, fid); | ||
| 64 | + jclass zipformer2_ctc_config_cls = env->GetObjectClass(zipformer2_ctc_config); | ||
| 65 | + | ||
| 66 | + fid = | ||
| 67 | + env->GetFieldID(zipformer2_ctc_config_cls, "model", "Ljava/lang/String;"); | ||
| 68 | + s = (jstring)env->GetObjectField(zipformer2_ctc_config, fid); | ||
| 69 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 70 | + ans.zipformer2_ctc.model = p; | ||
| 71 | + env->ReleaseStringUTFChars(s, p); | ||
| 72 | + | ||
| 73 | + // streaming NeMo CTC | ||
| 74 | + fid = env->GetFieldID(model_config_cls, "neMoCtc", | ||
| 75 | + "Lcom/k2fsa/sherpa/onnx/OnlineNeMoCtcModelConfig;"); | ||
| 76 | + jobject nemo_ctc_config = env->GetObjectField(model_config, fid); | ||
| 77 | + jclass nemo_ctc_config_cls = env->GetObjectClass(nemo_ctc_config); | ||
| 78 | + | ||
| 79 | + fid = env->GetFieldID(nemo_ctc_config_cls, "model", "Ljava/lang/String;"); | ||
| 80 | + s = (jstring)env->GetObjectField(nemo_ctc_config, fid); | ||
| 81 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 82 | + ans.nemo_ctc.model = p; | ||
| 83 | + env->ReleaseStringUTFChars(s, p); | ||
| 84 | + | ||
| 85 | + fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;"); | ||
| 86 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 87 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 88 | + ans.tokens = p; | ||
| 89 | + env->ReleaseStringUTFChars(s, p); | ||
| 90 | + | ||
| 91 | + fid = env->GetFieldID(model_config_cls, "numThreads", "I"); | ||
| 92 | + ans.num_threads = env->GetIntField(model_config, fid); | ||
| 93 | + | ||
| 94 | + fid = env->GetFieldID(model_config_cls, "debug", "Z"); | ||
| 95 | + ans.debug = env->GetBooleanField(model_config, fid); | ||
| 96 | + | ||
| 97 | + fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); | ||
| 98 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 99 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 100 | + ans.provider_config.provider = p; | ||
| 101 | + env->ReleaseStringUTFChars(s, p); | ||
| 102 | + | ||
| 103 | + fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;"); | ||
| 104 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 105 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 106 | + ans.model_type = p; | ||
| 107 | + env->ReleaseStringUTFChars(s, p); | ||
| 108 | + | ||
| 109 | + fid = env->GetFieldID(model_config_cls, "modelingUnit", "Ljava/lang/String;"); | ||
| 110 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 111 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 112 | + ans.modeling_unit = p; | ||
| 113 | + env->ReleaseStringUTFChars(s, p); | ||
| 114 | + | ||
| 115 | + fid = env->GetFieldID(model_config_cls, "bpeVocab", "Ljava/lang/String;"); | ||
| 116 | + s = (jstring)env->GetObjectField(model_config, fid); | ||
| 117 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 118 | + ans.bpe_vocab = p; | ||
| 119 | + env->ReleaseStringUTFChars(s, p); | ||
| 120 | + | ||
| 121 | + return ans; | ||
| 122 | +} | ||
| 123 | + | ||
| 13 | static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { | 124 | static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { |
| 14 | OnlineRecognizerConfig ans; | 125 | OnlineRecognizerConfig ans; |
| 15 | 126 | ||
| @@ -122,109 +233,7 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { | @@ -122,109 +233,7 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { | ||
| 122 | jobject model_config = env->GetObjectField(config, fid); | 233 | jobject model_config = env->GetObjectField(config, fid); |
| 123 | jclass model_config_cls = env->GetObjectClass(model_config); | 234 | jclass model_config_cls = env->GetObjectClass(model_config); |
| 124 | 235 | ||
| 125 | - // transducer | ||
| 126 | - fid = env->GetFieldID(model_config_cls, "transducer", | ||
| 127 | - "Lcom/k2fsa/sherpa/onnx/OnlineTransducerModelConfig;"); | ||
| 128 | - jobject transducer_config = env->GetObjectField(model_config, fid); | ||
| 129 | - jclass transducer_config_cls = env->GetObjectClass(transducer_config); | ||
| 130 | - | ||
| 131 | - fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 132 | - s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 133 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 134 | - ans.model_config.transducer.encoder = p; | ||
| 135 | - env->ReleaseStringUTFChars(s, p); | ||
| 136 | - | ||
| 137 | - fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 138 | - s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 139 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 140 | - ans.model_config.transducer.decoder = p; | ||
| 141 | - env->ReleaseStringUTFChars(s, p); | ||
| 142 | - | ||
| 143 | - fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;"); | ||
| 144 | - s = (jstring)env->GetObjectField(transducer_config, fid); | ||
| 145 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 146 | - ans.model_config.transducer.joiner = p; | ||
| 147 | - env->ReleaseStringUTFChars(s, p); | ||
| 148 | - | ||
| 149 | - // paraformer | ||
| 150 | - fid = env->GetFieldID(model_config_cls, "paraformer", | ||
| 151 | - "Lcom/k2fsa/sherpa/onnx/OnlineParaformerModelConfig;"); | ||
| 152 | - jobject paraformer_config = env->GetObjectField(model_config, fid); | ||
| 153 | - jclass paraformer_config_cls = env->GetObjectClass(paraformer_config); | ||
| 154 | - | ||
| 155 | - fid = env->GetFieldID(paraformer_config_cls, "encoder", "Ljava/lang/String;"); | ||
| 156 | - s = (jstring)env->GetObjectField(paraformer_config, fid); | ||
| 157 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 158 | - ans.model_config.paraformer.encoder = p; | ||
| 159 | - env->ReleaseStringUTFChars(s, p); | ||
| 160 | - | ||
| 161 | - fid = env->GetFieldID(paraformer_config_cls, "decoder", "Ljava/lang/String;"); | ||
| 162 | - s = (jstring)env->GetObjectField(paraformer_config, fid); | ||
| 163 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 164 | - ans.model_config.paraformer.decoder = p; | ||
| 165 | - env->ReleaseStringUTFChars(s, p); | ||
| 166 | - | ||
| 167 | - // streaming zipformer2 CTC | ||
| 168 | - fid = | ||
| 169 | - env->GetFieldID(model_config_cls, "zipformer2Ctc", | ||
| 170 | - "Lcom/k2fsa/sherpa/onnx/OnlineZipformer2CtcModelConfig;"); | ||
| 171 | - jobject zipformer2_ctc_config = env->GetObjectField(model_config, fid); | ||
| 172 | - jclass zipformer2_ctc_config_cls = env->GetObjectClass(zipformer2_ctc_config); | ||
| 173 | - | ||
| 174 | - fid = | ||
| 175 | - env->GetFieldID(zipformer2_ctc_config_cls, "model", "Ljava/lang/String;"); | ||
| 176 | - s = (jstring)env->GetObjectField(zipformer2_ctc_config, fid); | ||
| 177 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 178 | - ans.model_config.zipformer2_ctc.model = p; | ||
| 179 | - env->ReleaseStringUTFChars(s, p); | ||
| 180 | - | ||
| 181 | - // streaming NeMo CTC | ||
| 182 | - fid = env->GetFieldID(model_config_cls, "neMoCtc", | ||
| 183 | - "Lcom/k2fsa/sherpa/onnx/OnlineNeMoCtcModelConfig;"); | ||
| 184 | - jobject nemo_ctc_config = env->GetObjectField(model_config, fid); | ||
| 185 | - jclass nemo_ctc_config_cls = env->GetObjectClass(nemo_ctc_config); | ||
| 186 | - | ||
| 187 | - fid = env->GetFieldID(nemo_ctc_config_cls, "model", "Ljava/lang/String;"); | ||
| 188 | - s = (jstring)env->GetObjectField(nemo_ctc_config, fid); | ||
| 189 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 190 | - ans.model_config.nemo_ctc.model = p; | ||
| 191 | - env->ReleaseStringUTFChars(s, p); | ||
| 192 | - | ||
| 193 | - fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;"); | ||
| 194 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 195 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 196 | - ans.model_config.tokens = p; | ||
| 197 | - env->ReleaseStringUTFChars(s, p); | ||
| 198 | - | ||
| 199 | - fid = env->GetFieldID(model_config_cls, "numThreads", "I"); | ||
| 200 | - ans.model_config.num_threads = env->GetIntField(model_config, fid); | ||
| 201 | - | ||
| 202 | - fid = env->GetFieldID(model_config_cls, "debug", "Z"); | ||
| 203 | - ans.model_config.debug = env->GetBooleanField(model_config, fid); | ||
| 204 | - | ||
| 205 | - fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); | ||
| 206 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 207 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 208 | - ans.model_config.provider_config.provider = p; | ||
| 209 | - env->ReleaseStringUTFChars(s, p); | ||
| 210 | - | ||
| 211 | - fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;"); | ||
| 212 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 213 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 214 | - ans.model_config.model_type = p; | ||
| 215 | - env->ReleaseStringUTFChars(s, p); | ||
| 216 | - | ||
| 217 | - fid = env->GetFieldID(model_config_cls, "modelingUnit", "Ljava/lang/String;"); | ||
| 218 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 219 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 220 | - ans.model_config.modeling_unit = p; | ||
| 221 | - env->ReleaseStringUTFChars(s, p); | ||
| 222 | - | ||
| 223 | - fid = env->GetFieldID(model_config_cls, "bpeVocab", "Ljava/lang/String;"); | ||
| 224 | - s = (jstring)env->GetObjectField(model_config, fid); | ||
| 225 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 226 | - ans.model_config.bpe_vocab = p; | ||
| 227 | - env->ReleaseStringUTFChars(s, p); | 236 | + ans.model_config = GetOnlineModelConfig(env, model_config_cls, model_config); |
| 228 | 237 | ||
| 229 | //---------- rnn lm model config ---------- | 238 | //---------- rnn lm model config ---------- |
| 230 | fid = env->GetFieldID(cls, "lmConfig", | 239 | fid = env->GetFieldID(cls, "lmConfig", |
| @@ -165,6 +165,11 @@ type | @@ -165,6 +165,11 @@ type | ||
| 165 | function ToString: AnsiString; | 165 | function ToString: AnsiString; |
| 166 | end; | 166 | end; |
| 167 | 167 | ||
| 168 | + TSherpaOnnxOnlineNemoCtcModelConfig = record | ||
| 169 | + Model: AnsiString; | ||
| 170 | + function ToString: AnsiString; | ||
| 171 | + end; | ||
| 172 | + | ||
| 168 | TSherpaOnnxOnlineModelConfig = record | 173 | TSherpaOnnxOnlineModelConfig = record |
| 169 | Transducer: TSherpaOnnxOnlineTransducerModelConfig; | 174 | Transducer: TSherpaOnnxOnlineTransducerModelConfig; |
| 170 | Paraformer: TSherpaOnnxOnlineParaformerModelConfig; | 175 | Paraformer: TSherpaOnnxOnlineParaformerModelConfig; |
| @@ -178,6 +183,7 @@ type | @@ -178,6 +183,7 @@ type | ||
| 178 | BpeVocab: AnsiString; | 183 | BpeVocab: AnsiString; |
| 179 | TokensBuf: AnsiString; | 184 | TokensBuf: AnsiString; |
| 180 | TokensBufSize: Integer; | 185 | TokensBufSize: Integer; |
| 186 | + NemoCtc: TSherpaOnnxOnlineNemoCtcModelConfig; | ||
| 181 | function ToString: AnsiString; | 187 | function ToString: AnsiString; |
| 182 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); | 188 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); |
| 183 | end; | 189 | end; |
| @@ -691,6 +697,10 @@ type | @@ -691,6 +697,10 @@ type | ||
| 691 | Model: PAnsiChar; | 697 | Model: PAnsiChar; |
| 692 | end; | 698 | end; |
| 693 | 699 | ||
| 700 | + SherpaOnnxOnlineNemoCtcModelConfig = record | ||
| 701 | + Model: PAnsiChar; | ||
| 702 | + end; | ||
| 703 | + | ||
| 694 | SherpaOnnxOnlineModelConfig= record | 704 | SherpaOnnxOnlineModelConfig= record |
| 695 | Transducer: SherpaOnnxOnlineTransducerModelConfig; | 705 | Transducer: SherpaOnnxOnlineTransducerModelConfig; |
| 696 | Paraformer: SherpaOnnxOnlineParaformerModelConfig; | 706 | Paraformer: SherpaOnnxOnlineParaformerModelConfig; |
| @@ -704,6 +714,7 @@ type | @@ -704,6 +714,7 @@ type | ||
| 704 | BpeVocab: PAnsiChar; | 714 | BpeVocab: PAnsiChar; |
| 705 | TokensBuf: PAnsiChar; | 715 | TokensBuf: PAnsiChar; |
| 706 | TokensBufSize: cint32; | 716 | TokensBufSize: cint32; |
| 717 | + NemoCtc: SherpaOnnxOnlineNemoCtcModelConfig; | ||
| 707 | end; | 718 | end; |
| 708 | SherpaOnnxFeatureConfig = record | 719 | SherpaOnnxFeatureConfig = record |
| 709 | SampleRate: cint32; | 720 | SampleRate: cint32; |
| @@ -1311,6 +1322,12 @@ begin | @@ -1311,6 +1322,12 @@ begin | ||
| 1311 | [Self.Model]); | 1322 | [Self.Model]); |
| 1312 | end; | 1323 | end; |
| 1313 | 1324 | ||
| 1325 | +function TSherpaOnnxOnlineNemoCtcModelConfig.ToString: AnsiString; | ||
| 1326 | +begin | ||
| 1327 | + Result := Format('TSherpaOnnxOnlineNemoCtcModelConfig(Model := %s)', | ||
| 1328 | + [Self.Model]); | ||
| 1329 | +end; | ||
| 1330 | + | ||
| 1314 | function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; | 1331 | function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; |
| 1315 | begin | 1332 | begin |
| 1316 | Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + | 1333 | Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + |
| @@ -1322,12 +1339,13 @@ begin | @@ -1322,12 +1339,13 @@ begin | ||
| 1322 | 'Debug := %s, ' + | 1339 | 'Debug := %s, ' + |
| 1323 | 'ModelType := %s, ' + | 1340 | 'ModelType := %s, ' + |
| 1324 | 'ModelingUnit := %s, ' + | 1341 | 'ModelingUnit := %s, ' + |
| 1325 | - 'BpeVocab := %s)' | ||
| 1326 | - , | 1342 | + 'BpeVocab := %s, ' + |
| 1343 | + 'NemoCtc := %s', | ||
| 1327 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1344 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1328 | Self.Zipformer2Ctc.ToString, Self.Tokens, | 1345 | Self.Zipformer2Ctc.ToString, Self.Tokens, |
| 1329 | Self.NumThreads, Self.Provider, Self.Debug.ToString, | 1346 | Self.NumThreads, Self.Provider, Self.Debug.ToString, |
| 1330 | - Self.ModelType, Self.ModelingUnit, Self.BpeVocab | 1347 | + Self.ModelType, Self.ModelingUnit, Self.BpeVocab, |
| 1348 | + Self.NemoCtc.ToString | ||
| 1331 | ]); | 1349 | ]); |
| 1332 | end; | 1350 | end; |
| 1333 | 1351 | ||
| @@ -1426,6 +1444,7 @@ begin | @@ -1426,6 +1444,7 @@ begin | ||
| 1426 | C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder); | 1444 | C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder); |
| 1427 | 1445 | ||
| 1428 | C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); | 1446 | C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); |
| 1447 | + C.ModelConfig.NemoCtc.Model := PAnsiChar(Config.ModelConfig.NemoCtc.Model); | ||
| 1429 | 1448 | ||
| 1430 | C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); | 1449 | C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); |
| 1431 | C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; | 1450 | C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; |
| @@ -128,77 +128,69 @@ class TestOfflineRecognizer(unittest.TestCase): | @@ -128,77 +128,69 @@ class TestOfflineRecognizer(unittest.TestCase): | ||
| 128 | print(s2.result.text) | 128 | print(s2.result.text) |
| 129 | 129 | ||
| 130 | def test_paraformer_single_file(self): | 130 | def test_paraformer_single_file(self): |
| 131 | - for use_int8 in [True, False]: | ||
| 132 | - if use_int8: | ||
| 133 | - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx" | ||
| 134 | - else: | ||
| 135 | - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.onnx" | 131 | + model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx" |
| 136 | 132 | ||
| 137 | - tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt" | ||
| 138 | - wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav" | 133 | + tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt" |
| 134 | + wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav" | ||
| 139 | 135 | ||
| 140 | - if not Path(model).is_file(): | ||
| 141 | - print("skipping test_paraformer_single_file()") | ||
| 142 | - return | 136 | + if not Path(model).is_file(): |
| 137 | + print("skipping test_paraformer_single_file()") | ||
| 138 | + return | ||
| 143 | 139 | ||
| 144 | - recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer( | ||
| 145 | - paraformer=model, | ||
| 146 | - tokens=tokens, | ||
| 147 | - num_threads=1, | ||
| 148 | - provider="cpu", | ||
| 149 | - ) | 140 | + recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer( |
| 141 | + paraformer=model, | ||
| 142 | + tokens=tokens, | ||
| 143 | + num_threads=1, | ||
| 144 | + provider="cpu", | ||
| 145 | + ) | ||
| 150 | 146 | ||
| 151 | - s = recognizer.create_stream() | ||
| 152 | - samples, sample_rate = read_wave(wave0) | ||
| 153 | - s.accept_waveform(sample_rate, samples) | ||
| 154 | - recognizer.decode_stream(s) | ||
| 155 | - print(s.result.text) | 147 | + s = recognizer.create_stream() |
| 148 | + samples, sample_rate = read_wave(wave0) | ||
| 149 | + s.accept_waveform(sample_rate, samples) | ||
| 150 | + recognizer.decode_stream(s) | ||
| 151 | + print(s.result.text) | ||
| 156 | 152 | ||
| 157 | def test_paraformer_multiple_files(self): | 153 | def test_paraformer_multiple_files(self): |
| 158 | - for use_int8 in [True, False]: | ||
| 159 | - if use_int8: | ||
| 160 | - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx" | ||
| 161 | - else: | ||
| 162 | - model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.onnx" | ||
| 163 | - | ||
| 164 | - tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt" | ||
| 165 | - wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav" | ||
| 166 | - wave1 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav" | ||
| 167 | - wave2 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav" | ||
| 168 | - wave3 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav" | ||
| 169 | - | ||
| 170 | - if not Path(model).is_file(): | ||
| 171 | - print("skipping test_paraformer_multiple_files()") | ||
| 172 | - return | ||
| 173 | - | ||
| 174 | - recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer( | ||
| 175 | - paraformer=model, | ||
| 176 | - tokens=tokens, | ||
| 177 | - num_threads=1, | ||
| 178 | - provider="cpu", | ||
| 179 | - ) | ||
| 180 | - | ||
| 181 | - s0 = recognizer.create_stream() | ||
| 182 | - samples0, sample_rate0 = read_wave(wave0) | ||
| 183 | - s0.accept_waveform(sample_rate0, samples0) | ||
| 184 | - | ||
| 185 | - s1 = recognizer.create_stream() | ||
| 186 | - samples1, sample_rate1 = read_wave(wave1) | ||
| 187 | - s1.accept_waveform(sample_rate1, samples1) | ||
| 188 | - | ||
| 189 | - s2 = recognizer.create_stream() | ||
| 190 | - samples2, sample_rate2 = read_wave(wave2) | ||
| 191 | - s2.accept_waveform(sample_rate2, samples2) | ||
| 192 | - | ||
| 193 | - s3 = recognizer.create_stream() | ||
| 194 | - samples3, sample_rate3 = read_wave(wave3) | ||
| 195 | - s3.accept_waveform(sample_rate3, samples3) | ||
| 196 | - | ||
| 197 | - recognizer.decode_streams([s0, s1, s2, s3]) | ||
| 198 | - print(s0.result.text) | ||
| 199 | - print(s1.result.text) | ||
| 200 | - print(s2.result.text) | ||
| 201 | - print(s3.result.text) | 154 | + model = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx" |
| 155 | + | ||
| 156 | + tokens = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt" | ||
| 157 | + wave0 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/0.wav" | ||
| 158 | + wave1 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/1.wav" | ||
| 159 | + wave2 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/2.wav" | ||
| 160 | + wave3 = f"{d}/sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/8k.wav" | ||
| 161 | + | ||
| 162 | + if not Path(model).is_file(): | ||
| 163 | + print("skipping test_paraformer_multiple_files()") | ||
| 164 | + return | ||
| 165 | + | ||
| 166 | + recognizer = sherpa_onnx.OfflineRecognizer.from_paraformer( | ||
| 167 | + paraformer=model, | ||
| 168 | + tokens=tokens, | ||
| 169 | + num_threads=1, | ||
| 170 | + provider="cpu", | ||
| 171 | + ) | ||
| 172 | + | ||
| 173 | + s0 = recognizer.create_stream() | ||
| 174 | + samples0, sample_rate0 = read_wave(wave0) | ||
| 175 | + s0.accept_waveform(sample_rate0, samples0) | ||
| 176 | + | ||
| 177 | + s1 = recognizer.create_stream() | ||
| 178 | + samples1, sample_rate1 = read_wave(wave1) | ||
| 179 | + s1.accept_waveform(sample_rate1, samples1) | ||
| 180 | + | ||
| 181 | + s2 = recognizer.create_stream() | ||
| 182 | + samples2, sample_rate2 = read_wave(wave2) | ||
| 183 | + s2.accept_waveform(sample_rate2, samples2) | ||
| 184 | + | ||
| 185 | + s3 = recognizer.create_stream() | ||
| 186 | + samples3, sample_rate3 = read_wave(wave3) | ||
| 187 | + s3.accept_waveform(sample_rate3, samples3) | ||
| 188 | + | ||
| 189 | + recognizer.decode_streams([s0, s1, s2, s3]) | ||
| 190 | + print(s0.result.text) | ||
| 191 | + print(s1.result.text) | ||
| 192 | + print(s2.result.text) | ||
| 193 | + print(s3.result.text) | ||
| 202 | 194 | ||
| 203 | def test_nemo_ctc_single_file(self): | 195 | def test_nemo_ctc_single_file(self): |
| 204 | for use_int8 in [True, False]: | 196 | for use_int8 in [True, False]: |
| @@ -68,6 +68,14 @@ func sherpaOnnxOnlineZipformer2CtcModelConfig( | @@ -68,6 +68,14 @@ func sherpaOnnxOnlineZipformer2CtcModelConfig( | ||
| 68 | ) | 68 | ) |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | +func sherpaOnnxOnlineNemoCtcModelConfig( | ||
| 72 | + model: String = "" | ||
| 73 | +) -> SherpaOnnxOnlineNemoCtcModelConfig { | ||
| 74 | + return SherpaOnnxOnlineNemoCtcModelConfig( | ||
| 75 | + model: toCPointer(model) | ||
| 76 | + ) | ||
| 77 | +} | ||
| 78 | + | ||
| 71 | /// Return an instance of SherpaOnnxOnlineModelConfig. | 79 | /// Return an instance of SherpaOnnxOnlineModelConfig. |
| 72 | /// | 80 | /// |
| 73 | /// Please refer to | 81 | /// Please refer to |
| @@ -92,7 +100,8 @@ func sherpaOnnxOnlineModelConfig( | @@ -92,7 +100,8 @@ func sherpaOnnxOnlineModelConfig( | ||
| 92 | modelingUnit: String = "cjkchar", | 100 | modelingUnit: String = "cjkchar", |
| 93 | bpeVocab: String = "", | 101 | bpeVocab: String = "", |
| 94 | tokensBuf: String = "", | 102 | tokensBuf: String = "", |
| 95 | - tokensBufSize: Int = 0 | 103 | + tokensBufSize: Int = 0, |
| 104 | + nemoCtc: SherpaOnnxOnlineNemoCtcModelConfig = sherpaOnnxOnlineNemoCtcModelConfig() | ||
| 96 | ) -> SherpaOnnxOnlineModelConfig { | 105 | ) -> SherpaOnnxOnlineModelConfig { |
| 97 | return SherpaOnnxOnlineModelConfig( | 106 | return SherpaOnnxOnlineModelConfig( |
| 98 | transducer: transducer, | 107 | transducer: transducer, |
| @@ -106,7 +115,8 @@ func sherpaOnnxOnlineModelConfig( | @@ -106,7 +115,8 @@ func sherpaOnnxOnlineModelConfig( | ||
| 106 | modeling_unit: toCPointer(modelingUnit), | 115 | modeling_unit: toCPointer(modelingUnit), |
| 107 | bpe_vocab: toCPointer(bpeVocab), | 116 | bpe_vocab: toCPointer(bpeVocab), |
| 108 | tokens_buf: toCPointer(tokensBuf), | 117 | tokens_buf: toCPointer(tokensBuf), |
| 109 | - tokens_buf_size: Int32(tokensBufSize) | 118 | + tokens_buf_size: Int32(tokensBufSize), |
| 119 | + nemo_ctc: nemoCtc | ||
| 110 | ) | 120 | ) |
| 111 | } | 121 | } |
| 112 | 122 |
| @@ -15,8 +15,8 @@ function freeConfig(config, Module) { | @@ -15,8 +15,8 @@ function freeConfig(config, Module) { | ||
| 15 | freeConfig(config.paraformer, Module) | 15 | freeConfig(config.paraformer, Module) |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | - if ('ctc' in config) { | ||
| 19 | - freeConfig(config.ctc, Module) | 18 | + if ('zipformer2Ctc' in config) { |
| 19 | + freeConfig(config.zipformer2Ctc, Module) | ||
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | if ('feat' in config) { | 22 | if ('feat' in config) { |
| @@ -157,6 +157,22 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | @@ -157,6 +157,22 @@ function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) { | ||
| 157 | } | 157 | } |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | +function initSherpaOnnxOnlineNemoCtcModelConfig(config, Module) { | ||
| 161 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; | ||
| 162 | + const buffer = Module._malloc(n); | ||
| 163 | + | ||
| 164 | + const len = 1 * 4; // 1 pointer | ||
| 165 | + const ptr = Module._malloc(len); | ||
| 166 | + | ||
| 167 | + Module.stringToUTF8(config.model || '', buffer, n); | ||
| 168 | + | ||
| 169 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 170 | + | ||
| 171 | + return { | ||
| 172 | + buffer: buffer, ptr: ptr, len: len, | ||
| 173 | + } | ||
| 174 | +} | ||
| 175 | + | ||
| 160 | function initSherpaOnnxOnlineModelConfig(config, Module) { | 176 | function initSherpaOnnxOnlineModelConfig(config, Module) { |
| 161 | if (!('transducer' in config)) { | 177 | if (!('transducer' in config)) { |
| 162 | config.transducer = { | 178 | config.transducer = { |
| @@ -179,6 +195,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -179,6 +195,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 179 | }; | 195 | }; |
| 180 | } | 196 | } |
| 181 | 197 | ||
| 198 | + if (!('nemoCtc' in config)) { | ||
| 199 | + config.nemoCtc = { | ||
| 200 | + model: '', | ||
| 201 | + }; | ||
| 202 | + } | ||
| 203 | + | ||
| 182 | if (!('tokensBuf' in config)) { | 204 | if (!('tokensBuf' in config)) { |
| 183 | config.tokensBuf = ''; | 205 | config.tokensBuf = ''; |
| 184 | } | 206 | } |
| @@ -193,10 +215,15 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -193,10 +215,15 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 193 | const paraformer = | 215 | const paraformer = |
| 194 | initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); | 216 | initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module); |
| 195 | 217 | ||
| 196 | - const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( | 218 | + const zipformer2Ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig( |
| 197 | config.zipformer2Ctc, Module); | 219 | config.zipformer2Ctc, Module); |
| 198 | 220 | ||
| 199 | - const len = transducer.len + paraformer.len + ctc.len + 9 * 4; | 221 | + const nemoCtc = |
| 222 | + initSherpaOnnxOnlineNemoCtcModelConfig(config.nemoCtc, Module); | ||
| 223 | + | ||
| 224 | + const len = | ||
| 225 | + transducer.len + paraformer.len + zipformer2Ctc.len + 9 * 4 + nemoCtc.len; | ||
| 226 | + | ||
| 200 | const ptr = Module._malloc(len); | 227 | const ptr = Module._malloc(len); |
| 201 | 228 | ||
| 202 | let offset = 0; | 229 | let offset = 0; |
| @@ -206,8 +233,8 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -206,8 +233,8 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 206 | Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); | 233 | Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset); |
| 207 | offset += paraformer.len; | 234 | offset += paraformer.len; |
| 208 | 235 | ||
| 209 | - Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset); | ||
| 210 | - offset += ctc.len; | 236 | + Module._CopyHeap(zipformer2Ctc.ptr, zipformer2Ctc.len, ptr + offset); |
| 237 | + offset += zipformer2Ctc.len; | ||
| 211 | 238 | ||
| 212 | const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | 239 | const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; |
| 213 | const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | 240 | const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
| @@ -240,7 +267,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -240,7 +267,7 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 240 | Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); | 267 | Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); |
| 241 | offset += tokensBufLen; | 268 | offset += tokensBufLen; |
| 242 | 269 | ||
| 243 | - offset = transducer.len + paraformer.len + ctc.len; | 270 | + offset = transducer.len + paraformer.len + zipformer2Ctc.len; |
| 244 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | 271 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens |
| 245 | offset += 4; | 272 | offset += 4; |
| 246 | 273 | ||
| @@ -278,9 +305,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | @@ -278,9 +305,12 @@ function initSherpaOnnxOnlineModelConfig(config, Module) { | ||
| 278 | Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); | 305 | Module.setValue(ptr + offset, config.tokensBufSize || 0, 'i32'); |
| 279 | offset += 4; | 306 | offset += 4; |
| 280 | 307 | ||
| 308 | + Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset); | ||
| 309 | + offset += nemoCtc.len; | ||
| 310 | + | ||
| 281 | return { | 311 | return { |
| 282 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 312 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 283 | - paraformer: paraformer, ctc: ctc | 313 | + paraformer: paraformer, zipformer2Ctc: zipformer2Ctc, nemoCtc: nemoCtc |
| 284 | } | 314 | } |
| 285 | } | 315 | } |
| 286 | 316 | ||
| @@ -485,6 +515,10 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -485,6 +515,10 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 485 | model: '', | 515 | model: '', |
| 486 | }; | 516 | }; |
| 487 | 517 | ||
| 518 | + const onlineNemoCtcModelConfig = { | ||
| 519 | + model: '', | ||
| 520 | + }; | ||
| 521 | + | ||
| 488 | let type = 0; | 522 | let type = 0; |
| 489 | 523 | ||
| 490 | switch (type) { | 524 | switch (type) { |
| @@ -500,9 +534,13 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -500,9 +534,13 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 500 | onlineParaformerModelConfig.decoder = './decoder.onnx'; | 534 | onlineParaformerModelConfig.decoder = './decoder.onnx'; |
| 501 | break; | 535 | break; |
| 502 | case 2: | 536 | case 2: |
| 503 | - // ctc | 537 | + // zipformer2Ctc |
| 504 | onlineZipformer2CtcModelConfig.model = './encoder.onnx'; | 538 | onlineZipformer2CtcModelConfig.model = './encoder.onnx'; |
| 505 | break; | 539 | break; |
| 540 | + case 3: | ||
| 541 | + // nemoCtc | ||
| 542 | + onlineNemoCtcModelConfig.model = './nemo-ctc.onnx'; | ||
| 543 | + break; | ||
| 506 | } | 544 | } |
| 507 | 545 | ||
| 508 | 546 | ||
| @@ -510,6 +548,7 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -510,6 +548,7 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 510 | transducer: onlineTransducerModelConfig, | 548 | transducer: onlineTransducerModelConfig, |
| 511 | paraformer: onlineParaformerModelConfig, | 549 | paraformer: onlineParaformerModelConfig, |
| 512 | zipformer2Ctc: onlineZipformer2CtcModelConfig, | 550 | zipformer2Ctc: onlineZipformer2CtcModelConfig, |
| 551 | + nemoCtc: onlineNemoCtcModelConfig, | ||
| 513 | tokens: './tokens.txt', | 552 | tokens: './tokens.txt', |
| 514 | numThreads: 1, | 553 | numThreads: 1, |
| 515 | provider: 'cpu', | 554 | provider: 'cpu', |
| @@ -16,10 +16,12 @@ extern "C" { | @@ -16,10 +16,12 @@ extern "C" { | ||
| 16 | static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, ""); | 16 | static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, ""); |
| 17 | static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, ""); | 17 | static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, ""); |
| 18 | static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, ""); | 18 | static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, ""); |
| 19 | +static_assert(sizeof(SherpaOnnxOnlineNemoCtcModelConfig) == 1 * 4, ""); | ||
| 19 | static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | 20 | static_assert(sizeof(SherpaOnnxOnlineModelConfig) == |
| 20 | sizeof(SherpaOnnxOnlineTransducerModelConfig) + | 21 | sizeof(SherpaOnnxOnlineTransducerModelConfig) + |
| 21 | sizeof(SherpaOnnxOnlineParaformerModelConfig) + | 22 | sizeof(SherpaOnnxOnlineParaformerModelConfig) + |
| 22 | - sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4, | 23 | + sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4 + |
| 24 | + sizeof(SherpaOnnxOnlineNemoCtcModelConfig), | ||
| 23 | ""); | 25 | ""); |
| 24 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 26 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 25 | static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); | 27 | static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); |
| @@ -36,6 +38,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | @@ -36,6 +38,7 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | ||
| 36 | auto transducer_model_config = &model_config->transducer; | 38 | auto transducer_model_config = &model_config->transducer; |
| 37 | auto paraformer_model_config = &model_config->paraformer; | 39 | auto paraformer_model_config = &model_config->paraformer; |
| 38 | auto ctc_model_config = &model_config->zipformer2_ctc; | 40 | auto ctc_model_config = &model_config->zipformer2_ctc; |
| 41 | + auto nemo_ctc = &model_config->nemo_ctc; | ||
| 39 | 42 | ||
| 40 | fprintf(stdout, "----------online transducer model config----------\n"); | 43 | fprintf(stdout, "----------online transducer model config----------\n"); |
| 41 | fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); | 44 | fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); |
| @@ -46,8 +49,12 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | @@ -46,8 +49,12 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | ||
| 46 | fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder); | 49 | fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder); |
| 47 | fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder); | 50 | fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder); |
| 48 | 51 | ||
| 49 | - fprintf(stdout, "----------online ctc model config----------\n"); | 52 | + fprintf(stdout, "----------online zipformer2 ctc model config----------\n"); |
| 50 | fprintf(stdout, "model: %s\n", ctc_model_config->model); | 53 | fprintf(stdout, "model: %s\n", ctc_model_config->model); |
| 54 | + | ||
| 55 | + fprintf(stdout, "----------online nemo ctc model config----------\n"); | ||
| 56 | + fprintf(stdout, "model: %s\n", nemo_ctc->model); | ||
| 57 | + | ||
| 51 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 58 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 52 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 59 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 53 | fprintf(stdout, "provider: %s\n", model_config->provider); | 60 | fprintf(stdout, "provider: %s\n", model_config->provider); |
| @@ -73,9 +73,12 @@ function initModelConfig(config, Module) { | @@ -73,9 +73,12 @@ function initModelConfig(config, Module) { | ||
| 73 | const transducer = | 73 | const transducer = |
| 74 | initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); | 74 | initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); |
| 75 | const paraformer_len = 2 * 4 | 75 | const paraformer_len = 2 * 4 |
| 76 | - const ctc_len = 1 * 4 | 76 | + const zipfomer2_ctc_len = 1 * 4 |
| 77 | + const nemo_ctc_len = 1 * 4 | ||
| 78 | + | ||
| 79 | + const len = transducer.len + paraformer_len + zipfomer2_ctc_len + 9 * 4 + | ||
| 80 | + nemo_ctc_len; | ||
| 77 | 81 | ||
| 78 | - const len = transducer.len + paraformer_len + ctc_len + 9 * 4; | ||
| 79 | const ptr = Module._malloc(len); | 82 | const ptr = Module._malloc(len); |
| 80 | Module.HEAPU8.fill(0, ptr, ptr + len); | 83 | Module.HEAPU8.fill(0, ptr, ptr + len); |
| 81 | 84 | ||
| @@ -112,7 +115,7 @@ function initModelConfig(config, Module) { | @@ -112,7 +115,7 @@ function initModelConfig(config, Module) { | ||
| 112 | Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); | 115 | Module.stringToUTF8(config.tokensBuf || '', buffer + offset, tokensBufLen); |
| 113 | offset += tokensBufLen; | 116 | offset += tokensBufLen; |
| 114 | 117 | ||
| 115 | - offset = transducer.len + paraformer_len + ctc_len; | 118 | + offset = transducer.len + paraformer_len + zipfomer2_ctc_len; |
| 116 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens | 119 | Module.setValue(ptr + offset, buffer, 'i8*'); // tokens |
| 117 | offset += 4; | 120 | offset += 4; |
| 118 | 121 |
-
请 注册 或 登录 后发表评论