Committed by
GitHub
Add Javascript (node-addon) API for Dolphin CTC models (#2094)
正在显示
6 个修改的文件
包含
95 行增加
和
1 行删除
| @@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()") | @@ -10,6 +10,16 @@ arch=$(node -p "require('os').arch()") | ||
| 10 | platform=$(node -p "require('os').platform()") | 10 | platform=$(node -p "require('os').platform()") |
| 11 | node_version=$(node -p "process.versions.node.split('.')[0]") | 11 | node_version=$(node -p "process.versions.node.split('.')[0]") |
| 12 | 12 | ||
| 13 | +echo "----------non-streaming ASR dolphin CTC----------" | ||
| 14 | + | ||
| 15 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 16 | +tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 17 | +rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 18 | + | ||
| 19 | +node ./test_asr_non_streaming_dolphin_ctc.js | ||
| 20 | + | ||
| 21 | +rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 22 | + | ||
| 13 | echo "----------non-streaming speech denoiser----------" | 23 | echo "----------non-streaming speech denoiser----------" |
| 14 | 24 | ||
| 15 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | 25 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx |
| @@ -6,6 +6,7 @@ export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from | @@ -6,6 +6,7 @@ export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from | ||
| 6 | export { Samples, | 6 | export { Samples, |
| 7 | OfflineStream, | 7 | OfflineStream, |
| 8 | FeatureConfig, | 8 | FeatureConfig, |
| 9 | + OfflineDolphinModelConfig, | ||
| 9 | OfflineTransducerModelConfig, | 10 | OfflineTransducerModelConfig, |
| 10 | OfflineParaformerModelConfig, | 11 | OfflineParaformerModelConfig, |
| 11 | OfflineNemoEncDecCtcModelConfig, | 12 | OfflineNemoEncDecCtcModelConfig, |
| @@ -44,6 +44,22 @@ static SherpaOnnxOfflineParaformerModelConfig GetOfflineParaformerModelConfig( | @@ -44,6 +44,22 @@ static SherpaOnnxOfflineParaformerModelConfig GetOfflineParaformerModelConfig( | ||
| 44 | return c; | 44 | return c; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | +static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinfig( | ||
| 48 | + Napi::Object obj) { | ||
| 49 | + SherpaOnnxOfflineDolphinModelConfig c; | ||
| 50 | + memset(&c, 0, sizeof(c)); | ||
| 51 | + | ||
| 52 | + if (!obj.Has("dolphin") || !obj.Get("dolphin").IsObject()) { | ||
| 53 | + return c; | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + Napi::Object o = obj.Get("dolphin").As<Napi::Object>(); | ||
| 57 | + | ||
| 58 | + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); | ||
| 59 | + | ||
| 60 | + return c; | ||
| 61 | +} | ||
| 62 | + | ||
| 47 | static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig( | 63 | static SherpaOnnxOfflineNemoEncDecCtcModelConfig GetOfflineNeMoCtcModelConfig( |
| 48 | Napi::Object obj) { | 64 | Napi::Object obj) { |
| 49 | SherpaOnnxOfflineNemoEncDecCtcModelConfig c; | 65 | SherpaOnnxOfflineNemoEncDecCtcModelConfig c; |
| @@ -168,6 +184,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | @@ -168,6 +184,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | ||
| 168 | c.sense_voice = GetOfflineSenseVoiceModelConfig(o); | 184 | c.sense_voice = GetOfflineSenseVoiceModelConfig(o); |
| 169 | c.moonshine = GetOfflineMoonshineModelConfig(o); | 185 | c.moonshine = GetOfflineMoonshineModelConfig(o); |
| 170 | c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o); | 186 | c.fire_red_asr = GetOfflineFireRedAsrModelConfig(o); |
| 187 | + c.dolphin = GetOfflineDolphinfig(o); | ||
| 171 | 188 | ||
| 172 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 189 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 173 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 190 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| @@ -292,6 +309,8 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -292,6 +309,8 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 292 | SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder); | 309 | SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.encoder); |
| 293 | SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder); | 310 | SHERPA_ONNX_DELETE_C_STR(c.model_config.fire_red_asr.decoder); |
| 294 | 311 | ||
| 312 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.dolphin.model); | ||
| 313 | + | ||
| 295 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); | 314 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); |
| 296 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); | 315 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); |
| 297 | SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); | 316 | SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); |
| @@ -45,6 +45,10 @@ export class OfflineNemoEncDecCtcModelConfig { | @@ -45,6 +45,10 @@ export class OfflineNemoEncDecCtcModelConfig { | ||
| 45 | public model: string = ''; | 45 | public model: string = ''; |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | +export class OfflineDolphinModelConfig { | ||
| 49 | + public model: string = ''; | ||
| 50 | +} | ||
| 51 | + | ||
| 48 | export class OfflineWhisperModelConfig { | 52 | export class OfflineWhisperModelConfig { |
| 49 | public encoder: string = ''; | 53 | public encoder: string = ''; |
| 50 | public decoder: string = ''; | 54 | public decoder: string = ''; |
| @@ -86,6 +90,7 @@ export class OfflineModelConfig { | @@ -86,6 +90,7 @@ export class OfflineModelConfig { | ||
| 86 | public telespeechCtc: string = ''; | 90 | public telespeechCtc: string = ''; |
| 87 | public senseVoice: OfflineSenseVoiceModelConfig = new OfflineSenseVoiceModelConfig(); | 91 | public senseVoice: OfflineSenseVoiceModelConfig = new OfflineSenseVoiceModelConfig(); |
| 88 | public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig(); | 92 | public moonshine: OfflineMoonshineModelConfig = new OfflineMoonshineModelConfig(); |
| 93 | + public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); | ||
| 89 | } | 94 | } |
| 90 | 95 | ||
| 91 | export class OfflineLMConfig { | 96 | export class OfflineLMConfig { |
| @@ -159,4 +164,4 @@ export class OfflineRecognizer { | @@ -159,4 +164,4 @@ export class OfflineRecognizer { | ||
| 159 | 164 | ||
| 160 | return r; | 165 | return r; |
| 161 | } | 166 | } |
| 162 | -} | ||
| 167 | +} |
| @@ -122,6 +122,7 @@ The following tables list the examples in this folder. | @@ -122,6 +122,7 @@ The following tables list the examples in this folder. | ||
| 122 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| | 122 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| |
| 123 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| | 123 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| |
| 124 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | 124 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| |
| 125 | +|[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| | ||
| 125 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | 126 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| |
| 126 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| | 127 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| |
| 127 | 128 | ||
| @@ -332,6 +333,16 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_v | @@ -332,6 +333,16 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_v | ||
| 332 | node ./test_vad_with_non_streaming_asr_whisper.js | 333 | node ./test_vad_with_non_streaming_asr_whisper.js |
| 333 | ``` | 334 | ``` |
| 334 | 335 | ||
| 336 | +### Non-streaming speech recognition with Dolphin CTC models | ||
| 337 | + | ||
| 338 | +```bash | ||
| 339 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 340 | +tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 341 | +rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 342 | + | ||
| 343 | +node ./test_asr_non_streaming_dolphin_ctc.js | ||
| 344 | +``` | ||
| 345 | + | ||
| 335 | ### Non-streaming speech recognition with NeMo CTC models | 346 | ### Non-streaming speech recognition with NeMo CTC models |
| 336 | 347 | ||
| 337 | ```bash | 348 | ```bash |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'featConfig': { | ||
| 8 | + 'sampleRate': 16000, | ||
| 9 | + 'featureDim': 80, | ||
| 10 | + }, | ||
| 11 | + 'modelConfig': { | ||
| 12 | + 'dolphin': { | ||
| 13 | + 'model': | ||
| 14 | + './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx', | ||
| 15 | + }, | ||
| 16 | + 'tokens': | ||
| 17 | + './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt', | ||
| 18 | + 'numThreads': 2, | ||
| 19 | + 'provider': 'cpu', | ||
| 20 | + 'debug': 1, | ||
| 21 | + } | ||
| 22 | +}; | ||
| 23 | + | ||
| 24 | +const waveFilename = | ||
| 25 | + './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav'; | ||
| 26 | + | ||
| 27 | +const recognizer = new sherpa_onnx.OfflineRecognizer(config); | ||
| 28 | +console.log('Started') | ||
| 29 | +let start = Date.now(); | ||
| 30 | +const stream = recognizer.createStream(); | ||
| 31 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 32 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 33 | + | ||
| 34 | +recognizer.decode(stream); | ||
| 35 | +result = recognizer.getResult(stream) | ||
| 36 | +let stop = Date.now(); | ||
| 37 | +console.log('Done') | ||
| 38 | + | ||
| 39 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 40 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 41 | +const real_time_factor = elapsed_seconds / duration; | ||
| 42 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 43 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 44 | +console.log( | ||
| 45 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 46 | + real_time_factor.toFixed(3)) | ||
| 47 | +console.log(waveFilename) | ||
| 48 | +console.log('result\n', result) |
-
请 注册 或 登录 后发表评论