Committed by
GitHub
Add JavaScript API (node-addon) for homophone replacer (#2158)
正在显示
7 个修改的文件
包含
214 行增加
和
0 行删除
| @@ -92,7 +92,18 @@ if [[ $arch != "ia32" && $platform != "win32" ]]; then | @@ -92,7 +92,18 @@ if [[ $arch != "ia32" && $platform != "win32" ]]; then | ||
| 92 | rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | 92 | rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 |
| 93 | 93 | ||
| 94 | node ./test_asr_non_streaming_sense_voice.js | 94 | node ./test_asr_non_streaming_sense_voice.js |
| 95 | + | ||
| 96 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 97 | + tar xf dict.tar.bz2 | ||
| 98 | + | ||
| 99 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 100 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 101 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 102 | + | ||
| 103 | + node ./test_asr_non_streaming_sense_voice_with_hr.js | ||
| 104 | + | ||
| 95 | rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 | 105 | rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 |
| 106 | + rm -rf dict replace.fst test-hr.wav lexicon.txt | ||
| 96 | 107 | ||
| 97 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | 108 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 |
| 98 | tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | 109 | tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 |
| @@ -253,12 +264,21 @@ rm -f itn* | @@ -253,12 +264,21 @@ rm -f itn* | ||
| 253 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | 264 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst |
| 254 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav | 265 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav |
| 255 | 266 | ||
| 267 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 268 | +tar xf dict.tar.bz2 | ||
| 269 | + | ||
| 270 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 271 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 272 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 273 | + | ||
| 256 | if [[ $arch != "ia32" && $platform != "win32" ]]; then | 274 | if [[ $arch != "ia32" && $platform != "win32" ]]; then |
| 257 | node test_asr_streaming_transducer_itn.js | 275 | node test_asr_streaming_transducer_itn.js |
| 258 | node test_asr_streaming_transducer.js | 276 | node test_asr_streaming_transducer.js |
| 277 | + node test_asr_streaming_transducer_with_hr.js | ||
| 259 | fi | 278 | fi |
| 260 | 279 | ||
| 261 | rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | 280 | rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 |
| 281 | +rm -rf dict lexicon.txt replace.fst test-hr.wav | ||
| 262 | 282 | ||
| 263 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | 283 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 |
| 264 | tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | 284 | tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 |
| @@ -9,6 +9,7 @@ | @@ -9,6 +9,7 @@ | ||
| 9 | 9 | ||
| 10 | // defined in ./streaming-asr.cc | 10 | // defined in ./streaming-asr.cc |
| 11 | SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj); | 11 | SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj); |
| 12 | +SherpaOnnxHomophoneReplacerConfig GetHomophoneReplacerConfig(Napi::Object obj); | ||
| 12 | 13 | ||
| 13 | static SherpaOnnxOfflineTransducerModelConfig GetOfflineTransducerModelConfig( | 14 | static SherpaOnnxOfflineTransducerModelConfig GetOfflineTransducerModelConfig( |
| 14 | Napi::Object obj) { | 15 | Napi::Object obj) { |
| @@ -261,6 +262,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -261,6 +262,7 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 261 | c.feat_config = GetFeatureConfig(o); | 262 | c.feat_config = GetFeatureConfig(o); |
| 262 | c.model_config = GetOfflineModelConfig(o); | 263 | c.model_config = GetOfflineModelConfig(o); |
| 263 | c.lm_config = GetOfflineLMConfig(o); | 264 | c.lm_config = GetOfflineLMConfig(o); |
| 265 | + c.hr = GetHomophoneReplacerConfig(o); | ||
| 264 | 266 | ||
| 265 | SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod); | 267 | SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod); |
| 266 | SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); | 268 | SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); |
| @@ -324,6 +326,9 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -324,6 +326,9 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 324 | SHERPA_ONNX_DELETE_C_STR(c.hotwords_file); | 326 | SHERPA_ONNX_DELETE_C_STR(c.hotwords_file); |
| 325 | SHERPA_ONNX_DELETE_C_STR(c.rule_fsts); | 327 | SHERPA_ONNX_DELETE_C_STR(c.rule_fsts); |
| 326 | SHERPA_ONNX_DELETE_C_STR(c.rule_fars); | 328 | SHERPA_ONNX_DELETE_C_STR(c.rule_fars); |
| 329 | + SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir); | ||
| 330 | + SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon); | ||
| 331 | + SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts); | ||
| 327 | 332 | ||
| 328 | if (!recognizer) { | 333 | if (!recognizer) { |
| 329 | Napi::TypeError::New(env, "Please check your config!") | 334 | Napi::TypeError::New(env, "Please check your config!") |
| @@ -144,6 +144,24 @@ static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig( | @@ -144,6 +144,24 @@ static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig( | ||
| 144 | return c; | 144 | return c; |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | +// Also used in ./non-streaming-asr.cc | ||
| 148 | +SherpaOnnxHomophoneReplacerConfig GetHomophoneReplacerConfig(Napi::Object obj) { | ||
| 149 | + SherpaOnnxHomophoneReplacerConfig c; | ||
| 150 | + memset(&c, 0, sizeof(c)); | ||
| 151 | + | ||
| 152 | + if (!obj.Has("hr") || !obj.Get("hr").IsObject()) { | ||
| 153 | + return c; | ||
| 154 | + } | ||
| 155 | + | ||
| 156 | + Napi::Object o = obj.Get("hr").As<Napi::Object>(); | ||
| 157 | + | ||
| 158 | + SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir); | ||
| 159 | + SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon); | ||
| 160 | + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts); | ||
| 161 | + | ||
| 162 | + return c; | ||
| 163 | +} | ||
| 164 | + | ||
| 147 | static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | 165 | static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( |
| 148 | const Napi::CallbackInfo &info) { | 166 | const Napi::CallbackInfo &info) { |
| 149 | Napi::Env env = info.Env(); | 167 | Napi::Env env = info.Env(); |
| @@ -179,6 +197,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | @@ -179,6 +197,7 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | ||
| 179 | memset(&c, 0, sizeof(c)); | 197 | memset(&c, 0, sizeof(c)); |
| 180 | c.feat_config = GetFeatureConfig(o); | 198 | c.feat_config = GetFeatureConfig(o); |
| 181 | c.model_config = GetOnlineModelConfig(o); | 199 | c.model_config = GetOnlineModelConfig(o); |
| 200 | + c.hr = GetHomophoneReplacerConfig(o); | ||
| 182 | 201 | ||
| 183 | SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod); | 202 | SHERPA_ONNX_ASSIGN_ATTR_STR(decoding_method, decodingMethod); |
| 184 | SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); | 203 | SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); |
| @@ -243,6 +262,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | @@ -243,6 +262,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper( | ||
| 243 | SHERPA_ONNX_DELETE_C_STR(c.hotwords_buf); | 262 | SHERPA_ONNX_DELETE_C_STR(c.hotwords_buf); |
| 244 | SHERPA_ONNX_DELETE_C_STR(c.ctc_fst_decoder_config.graph); | 263 | SHERPA_ONNX_DELETE_C_STR(c.ctc_fst_decoder_config.graph); |
| 245 | 264 | ||
| 265 | + SHERPA_ONNX_DELETE_C_STR(c.hr.dict_dir); | ||
| 266 | + SHERPA_ONNX_DELETE_C_STR(c.hr.lexicon); | ||
| 267 | + SHERPA_ONNX_DELETE_C_STR(c.hr.rule_fsts); | ||
| 268 | + | ||
| 246 | if (!recognizer) { | 269 | if (!recognizer) { |
| 247 | Napi::TypeError::New(env, "Please check your config!") | 270 | Napi::TypeError::New(env, "Please check your config!") |
| 248 | .ThrowAsJavaScriptException(); | 271 | .ThrowAsJavaScriptException(); |
| @@ -98,6 +98,7 @@ The following tables list the examples in this folder. | @@ -98,6 +98,7 @@ The following tables list the examples in this folder. | ||
| 98 | |File| Description| | 98 | |File| Description| |
| 99 | |---|---| | 99 | |---|---| |
| 100 | |[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model| | 100 | |[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model| |
| 101 | +|[./test_asr_streaming_transducer_with_hr.js](./test_asr_streaming_transducer_with_hr.js)| Streaming speech recognition from a file using a Zipformer transducer model with homophone replacer| | ||
| 101 | |[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search| | 102 | |[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search| |
| 102 | |[./test_asr_streaming_ctc_hlg.js](./test_asr_streaming_ctc_hlg.js)| Streaming speech recognition from a file using a Zipformer CTC model with HLG decoding| | 103 | |[./test_asr_streaming_ctc_hlg.js](./test_asr_streaming_ctc_hlg.js)| Streaming speech recognition from a file using a Zipformer CTC model with HLG decoding| |
| 103 | |[./test_asr_streaming_paraformer.js](./test_asr_streaming_paraformer.js)|Streaming speech recognition from a file using a [Paraformer](https://github.com/alibaba-damo-academy/FunASR) model| | 104 | |[./test_asr_streaming_paraformer.js](./test_asr_streaming_paraformer.js)|Streaming speech recognition from a file using a [Paraformer](https://github.com/alibaba-damo-academy/FunASR) model| |
| @@ -125,6 +126,7 @@ The following tables list the examples in this folder. | @@ -125,6 +126,7 @@ The following tables list the examples in this folder. | ||
| 125 | |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| | 126 | |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| |
| 126 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | 127 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| |
| 127 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| | 128 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| |
| 129 | +|[./test_asr_non_streaming_sense_voice_with_hr.js](./test_asr_non_streaming_sense_voice_with_hr.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) with homophone replacer| | ||
| 128 | 130 | ||
| 129 | ## Non-Streaming speech-to-text from a microphone with VAD | 131 | ## Non-Streaming speech-to-text from a microphone with VAD |
| 130 | 132 | ||
| @@ -207,6 +209,22 @@ rm sherpa-onnx-ced-mini-audio-tagging-2024-09-14.tar.bz2 | @@ -207,6 +209,22 @@ rm sherpa-onnx-ced-mini-audio-tagging-2024-09-14.tar.bz2 | ||
| 207 | node ./test_audio_tagging_ced.js | 209 | node ./test_audio_tagging_ced.js |
| 208 | ``` | 210 | ``` |
| 209 | 211 | ||
| 212 | +### Streaming speech recognition with Zipformer transducer with homophone replacer | ||
| 213 | +```bash | ||
| 214 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 215 | +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 216 | +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 217 | + | ||
| 218 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 219 | +tar xf dict.tar.bz2 | ||
| 220 | + | ||
| 221 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 222 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 223 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 224 | + | ||
| 225 | +node ./test_asr_streaming_transducer_with_hr.js | ||
| 226 | +``` | ||
| 227 | + | ||
| 210 | ### Streaming speech recognition with Zipformer transducer | 228 | ### Streaming speech recognition with Zipformer transducer |
| 211 | 229 | ||
| 212 | ```bash | 230 | ```bash |
| @@ -371,6 +389,22 @@ npm install naudiodon2 | @@ -371,6 +389,22 @@ npm install naudiodon2 | ||
| 371 | node ./test_vad_asr_non_streaming_paraformer_microphone.js | 389 | node ./test_vad_asr_non_streaming_paraformer_microphone.js |
| 372 | ``` | 390 | ``` |
| 373 | 391 | ||
| 392 | +### Non-streaming speech recognition with SenseVoice with homophone replacer | ||
| 393 | +```bash | ||
| 394 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 395 | +tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 396 | +rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 397 | + | ||
| 398 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 399 | +tar xf dict.tar.bz2 | ||
| 400 | + | ||
| 401 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 402 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 403 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 404 | + | ||
| 405 | +node ./test_asr_non_streaming_sense_voice_with_hr.js | ||
| 406 | +``` | ||
| 407 | + | ||
| 374 | ### Non-streaming speech recognition with SenseVoice | 408 | ### Non-streaming speech recognition with SenseVoice |
| 375 | 409 | ||
| 376 | ```bash | 410 | ```bash |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +// If your path contains non-ascii characters, e.g., Chinese, you can use | ||
| 10 | +// the following code | ||
| 11 | +// | ||
| 12 | + | ||
| 13 | +// let encoder = new TextEncoder(); | ||
| 14 | +// let tokens = encoder.encode( | ||
| 15 | +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.txt'); | ||
| 16 | +// let model = encoder.encode( | ||
| 17 | +// './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/测试.int8.onnx'); | ||
| 18 | + | ||
| 19 | + | ||
| 20 | +const config = { | ||
| 21 | + 'featConfig': { | ||
| 22 | + 'sampleRate': 16000, | ||
| 23 | + 'featureDim': 80, | ||
| 24 | + }, | ||
| 25 | + 'modelConfig': { | ||
| 26 | + 'senseVoice': { | ||
| 27 | + 'model': | ||
| 28 | + './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx', | ||
| 29 | + // 'model': model, | ||
| 30 | + 'useInverseTextNormalization': 1, | ||
| 31 | + }, | ||
| 32 | + 'tokens': './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt', | ||
| 33 | + // 'tokens': tokens, | ||
| 34 | + 'numThreads': 2, | ||
| 35 | + 'provider': 'cpu', | ||
| 36 | + 'debug': 1, | ||
| 37 | + }, | ||
| 38 | + 'hr': { | ||
| 39 | + // Please download files from | ||
| 40 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files | ||
| 41 | + 'dictDir': './dict', | ||
| 42 | + 'lexicon': './lexicon.txt', | ||
| 43 | + 'ruleFsts': './replace.fst', | ||
| 44 | + } | ||
| 45 | +}; | ||
| 46 | + | ||
| 47 | +const waveFilename = './test-hr.wav'; | ||
| 48 | + | ||
| 49 | +const recognizer = new sherpa_onnx.OfflineRecognizer(config); | ||
| 50 | +console.log('Started') | ||
| 51 | +let start = Date.now(); | ||
| 52 | +const stream = recognizer.createStream(); | ||
| 53 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 54 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 55 | + | ||
| 56 | +recognizer.decode(stream); | ||
| 57 | +result = recognizer.getResult(stream) | ||
| 58 | +let stop = Date.now(); | ||
| 59 | +console.log('Done') | ||
| 60 | + | ||
| 61 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 62 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 63 | +const real_time_factor = elapsed_seconds / duration; | ||
| 64 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 65 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 66 | +console.log( | ||
| 67 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 68 | + real_time_factor.toFixed(3)) | ||
| 69 | +console.log(waveFilename) | ||
| 70 | +console.log('result\n', result) |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'featConfig': { | ||
| 8 | + 'sampleRate': 16000, | ||
| 9 | + 'featureDim': 80, | ||
| 10 | + }, | ||
| 11 | + 'modelConfig': { | ||
| 12 | + 'transducer': { | ||
| 13 | + 'encoder': | ||
| 14 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx', | ||
| 15 | + 'decoder': | ||
| 16 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', | ||
| 17 | + 'joiner': | ||
| 18 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', | ||
| 19 | + }, | ||
| 20 | + 'tokens': | ||
| 21 | + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', | ||
| 22 | + 'numThreads': 2, | ||
| 23 | + 'provider': 'cpu', | ||
| 24 | + 'debug': 1, | ||
| 25 | + }, | ||
| 26 | + 'hr': { | ||
| 27 | + 'dictDir': './dict', | ||
| 28 | + 'lexicon': './lexicon.txt', | ||
| 29 | + 'ruleFsts': './replace.fst', | ||
| 30 | + }, | ||
| 31 | +}; | ||
| 32 | + | ||
| 33 | +const waveFilename = './test-hr.wav'; | ||
| 34 | + | ||
| 35 | +const recognizer = new sherpa_onnx.OnlineRecognizer(config); | ||
| 36 | +console.log('Started') | ||
| 37 | +let start = Date.now(); | ||
| 38 | +const stream = recognizer.createStream(); | ||
| 39 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 40 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 41 | + | ||
| 42 | +const tailPadding = new Float32Array(wave.sampleRate * 0.4); | ||
| 43 | +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); | ||
| 44 | + | ||
| 45 | +while (recognizer.isReady(stream)) { | ||
| 46 | + recognizer.decode(stream); | ||
| 47 | +} | ||
| 48 | +result = recognizer.getResult(stream) | ||
| 49 | +let stop = Date.now(); | ||
| 50 | +console.log('Done') | ||
| 51 | + | ||
| 52 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 53 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 54 | +const real_time_factor = elapsed_seconds / duration; | ||
| 55 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 56 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 57 | +console.log( | ||
| 58 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 59 | + real_time_factor.toFixed(3)) | ||
| 60 | +console.log(waveFilename) | ||
| 61 | +console.log('result\n', result) |
-
请 注册 或 登录 后发表评论