Committed by
GitHub
Add JavaScript API for Moonshine models (#1480)
正在显示
13 个修改的文件
包含
719 行增加
和
88 行删除
| @@ -10,6 +10,19 @@ arch=$(node -p "require('os').arch()") | @@ -10,6 +10,19 @@ arch=$(node -p "require('os').arch()") | ||
| 10 | platform=$(node -p "require('os').platform()") | 10 | platform=$(node -p "require('os').platform()") |
| 11 | node_version=$(node -p "process.versions.node.split('.')[0]") | 11 | node_version=$(node -p "process.versions.node.split('.')[0]") |
| 12 | 12 | ||
| 13 | +echo "----------non-streaming asr moonshine + vad----------" | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 15 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 16 | +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 17 | + | ||
| 18 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 19 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 20 | + | ||
| 21 | +node ./test_vad_with_non_streaming_asr_moonshine.js | ||
| 22 | +rm -rf sherpa-onnx-* | ||
| 23 | +rm *.wav | ||
| 24 | +rm *.onnx | ||
| 25 | + | ||
| 13 | echo "----------non-streaming speaker diarization----------" | 26 | echo "----------non-streaming speaker diarization----------" |
| 14 | 27 | ||
| 15 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | 28 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 |
| @@ -24,7 +37,7 @@ node ./test_offline_speaker_diarization.js | @@ -24,7 +37,7 @@ node ./test_offline_speaker_diarization.js | ||
| 24 | 37 | ||
| 25 | rm -rfv *.onnx *.wav sherpa-onnx-pyannote-* | 38 | rm -rfv *.onnx *.wav sherpa-onnx-pyannote-* |
| 26 | 39 | ||
| 27 | -echo "----------non-streaming asr + vad----------" | 40 | +echo "----------non-streaming asr whisper + vad----------" |
| 28 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | 41 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 29 | tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | 42 | tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 30 | rm sherpa-onnx-whisper-tiny.en.tar.bz2 | 43 | rm sherpa-onnx-whisper-tiny.en.tar.bz2 |
| @@ -218,6 +231,11 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2 | @@ -218,6 +231,11 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 218 | node ./test_asr_non_streaming_whisper.js | 231 | node ./test_asr_non_streaming_whisper.js |
| 219 | rm -rf sherpa-onnx-whisper-tiny.en | 232 | rm -rf sherpa-onnx-whisper-tiny.en |
| 220 | 233 | ||
| 234 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 235 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 236 | +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 221 | 237 | ||
| 238 | +node ./test_asr_non_streaming_moonshine.js | ||
| 239 | +rm -rf sherpa-onnx-* | ||
| 222 | 240 | ||
| 223 | ls -lh | 241 | ls -lh |
| @@ -21,6 +21,23 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segm | @@ -21,6 +21,23 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segm | ||
| 21 | node ./test-offline-speaker-diarization.js | 21 | node ./test-offline-speaker-diarization.js |
| 22 | rm -rfv *.wav *.onnx sherpa-onnx-pyannote-* | 22 | rm -rfv *.wav *.onnx sherpa-onnx-pyannote-* |
| 23 | 23 | ||
| 24 | +echo '-----vad+moonshine----------' | ||
| 25 | + | ||
| 26 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 27 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 28 | +rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 29 | + | ||
| 30 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 31 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 32 | +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 33 | + | ||
| 34 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 35 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 36 | +node ./test-vad-with-non-streaming-asr-whisper.js | ||
| 37 | +rm Obama.wav | ||
| 38 | +rm silero_vad.onnx | ||
| 39 | +rm -rf sherpa-onnx-moonshine-* | ||
| 40 | + | ||
| 24 | echo '-----vad+whisper----------' | 41 | echo '-----vad+whisper----------' |
| 25 | 42 | ||
| 26 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | 43 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 |
| @@ -90,6 +107,13 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2 | @@ -90,6 +107,13 @@ rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 90 | node ./test-offline-whisper.js | 107 | node ./test-offline-whisper.js |
| 91 | rm -rf sherpa-onnx-whisper-tiny.en | 108 | rm -rf sherpa-onnx-whisper-tiny.en |
| 92 | 109 | ||
| 110 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 111 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 112 | +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 113 | + | ||
| 114 | +node ./test-offline-moonshine.js | ||
| 115 | +rm -rf sherpa-onnx-moonshine-* | ||
| 116 | + | ||
| 93 | # online asr | 117 | # online asr |
| 94 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | 118 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 |
| 95 | tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | 119 | tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 |
| @@ -112,6 +112,8 @@ The following tables list the examples in this folder. | @@ -112,6 +112,8 @@ The following tables list the examples in this folder. | ||
| 112 | |[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model| | 112 | |[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model| |
| 113 | |[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)| | 113 | |[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)| |
| 114 | |[./test_vad_with_non_streaming_asr_whisper.js](./test_vad_with_non_streaming_asr_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper) + [Silero VAD](https://github.com/snakers4/silero-vad)| | 114 | |[./test_vad_with_non_streaming_asr_whisper.js](./test_vad_with_non_streaming_asr_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper) + [Silero VAD](https://github.com/snakers4/silero-vad)| |
| 115 | +|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| | ||
| 116 | +|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| | ||
| 115 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | 117 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| |
| 116 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | 118 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| |
| 117 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| | 119 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| |
| @@ -122,6 +124,7 @@ The following tables list the examples in this folder. | @@ -122,6 +124,7 @@ The following tables list the examples in this folder. | ||
| 122 | |---|---| | 124 | |---|---| |
| 123 | |[./test_vad_asr_non_streaming_transducer_microphone.js](./test_vad_asr_non_streaming_transducer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer transducer model| | 125 | |[./test_vad_asr_non_streaming_transducer_microphone.js](./test_vad_asr_non_streaming_transducer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer transducer model| |
| 124 | |[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)| | 126 | |[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)| |
| 127 | +|[./test_vad_asr_non_streaming_moonshine_microphone.js](./test_vad_asr_non_streaming_moonshine_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Moonshine](https://github.com/usefulsensors/moonshine)| | ||
| 125 | |[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | 128 | |[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| |
| 126 | |[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | 129 | |[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| |
| 127 | |[./test_vad_asr_non_streaming_sense_voice_microphone.js](./test_vad_asr_non_streaming_sense_voice_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| | 130 | |[./test_vad_asr_non_streaming_sense_voice_microphone.js](./test_vad_asr_non_streaming_sense_voice_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| |
| @@ -260,6 +263,33 @@ npm install naudiodon2 | @@ -260,6 +263,33 @@ npm install naudiodon2 | ||
| 260 | node ./test_vad_asr_non_streaming_whisper_microphone.js | 263 | node ./test_vad_asr_non_streaming_whisper_microphone.js |
| 261 | ``` | 264 | ``` |
| 262 | 265 | ||
| 266 | +### Non-streaming speech recognition with Moonshine | ||
| 267 | + | ||
| 268 | +```bash | ||
| 269 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 270 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 271 | +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 272 | + | ||
| 273 | +node ./test_asr_non_streaming_moonshine.js | ||
| 274 | + | ||
| 275 | +# To run VAD + non-streaming ASR with Moonshine using a microphone | ||
| 276 | +npm install naudiodon2 | ||
| 277 | +node ./test_vad_asr_non_streaming_moonshine_microphone.js | ||
| 278 | +``` | ||
| 279 | + | ||
| 280 | +### Non-streaming speech recognition with Moonshine + VAD | ||
| 281 | + | ||
| 282 | +```bash | ||
| 283 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 284 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 285 | +rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 286 | + | ||
| 287 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 288 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 289 | + | ||
| 290 | +node ./test_vad_with_non_streaming_asr_moonshine.js | ||
| 291 | +``` | ||
| 292 | + | ||
| 263 | ### Non-streaming speech recognition with Whisper + VAD | 293 | ### Non-streaming speech recognition with Whisper + VAD |
| 264 | 294 | ||
| 265 | ```bash | 295 | ```bash |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'featConfig': { | ||
| 8 | + 'sampleRate': 16000, | ||
| 9 | + 'featureDim': 80, | ||
| 10 | + }, | ||
| 11 | + 'modelConfig': { | ||
| 12 | + 'moonshine': { | ||
| 13 | + 'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx', | ||
| 14 | + 'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx', | ||
| 15 | + 'uncachedDecoder': | ||
| 16 | + './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx', | ||
| 17 | + 'cachedDecoder': | ||
| 18 | + './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx', | ||
| 19 | + }, | ||
| 20 | + 'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt', | ||
| 21 | + 'numThreads': 2, | ||
| 22 | + 'provider': 'cpu', | ||
| 23 | + 'debug': 1, | ||
| 24 | + } | ||
| 25 | +}; | ||
| 26 | + | ||
| 27 | +const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav'; | ||
| 28 | + | ||
| 29 | +const recognizer = new sherpa_onnx.OfflineRecognizer(config); | ||
| 30 | +console.log('Started') | ||
| 31 | +let start = Date.now(); | ||
| 32 | +const stream = recognizer.createStream(); | ||
| 33 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 34 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 35 | + | ||
| 36 | +recognizer.decode(stream); | ||
| 37 | +result = recognizer.getResult(stream) | ||
| 38 | +let stop = Date.now(); | ||
| 39 | +console.log('Done') | ||
| 40 | + | ||
| 41 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 42 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 43 | +const real_time_factor = elapsed_seconds / duration; | ||
| 44 | +console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 45 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | ||
| 46 | +console.log( | ||
| 47 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 48 | + real_time_factor.toFixed(3)) | ||
| 49 | +console.log(waveFilename) | ||
| 50 | +console.log('result\n', result) |
| 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const portAudio = require('naudiodon2'); | ||
| 4 | +// console.log(portAudio.getDevices()); | ||
| 5 | + | ||
| 6 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 7 | + | ||
| 8 | +function createRecognizer() { | ||
| 9 | + // Please download test files from | ||
| 10 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 11 | + const config = { | ||
| 12 | + 'featConfig': { | ||
| 13 | + 'sampleRate': 16000, | ||
| 14 | + 'featureDim': 80, | ||
| 15 | + }, | ||
| 16 | + 'modelConfig': { | ||
| 17 | + 'moonshine': { | ||
| 18 | + 'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx', | ||
| 19 | + 'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx', | ||
| 20 | + 'uncachedDecoder': | ||
| 21 | + './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx', | ||
| 22 | + 'cachedDecoder': | ||
| 23 | + './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx', | ||
| 24 | + }, | ||
| 25 | + 'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt', | ||
| 26 | + 'numThreads': 2, | ||
| 27 | + 'provider': 'cpu', | ||
| 28 | + 'debug': 1, | ||
| 29 | + } | ||
| 30 | + }; | ||
| 31 | + | ||
| 32 | + return new sherpa_onnx.OfflineRecognizer(config); | ||
| 33 | +} | ||
| 34 | + | ||
| 35 | +function createVad() { | ||
| 36 | + // please download silero_vad.onnx from | ||
| 37 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 38 | + const config = { | ||
| 39 | + sileroVad: { | ||
| 40 | + model: './silero_vad.onnx', | ||
| 41 | + threshold: 0.5, | ||
| 42 | + minSpeechDuration: 0.25, | ||
| 43 | + minSilenceDuration: 0.5, | ||
| 44 | + windowSize: 512, | ||
| 45 | + }, | ||
| 46 | + sampleRate: 16000, | ||
| 47 | + debug: true, | ||
| 48 | + numThreads: 1, | ||
| 49 | + }; | ||
| 50 | + | ||
| 51 | + const bufferSizeInSeconds = 60; | ||
| 52 | + | ||
| 53 | + return new sherpa_onnx.Vad(config, bufferSizeInSeconds); | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | +const recognizer = createRecognizer(); | ||
| 57 | +const vad = createVad(); | ||
| 58 | + | ||
| 59 | +const bufferSizeInSeconds = 30; | ||
| 60 | +const buffer = | ||
| 61 | + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); | ||
| 62 | + | ||
| 63 | +const ai = new portAudio.AudioIO({ | ||
| 64 | + inOptions: { | ||
| 65 | + channelCount: 1, | ||
| 66 | + closeOnError: true, // Close the stream if an audio error is detected, if | ||
| 67 | + // set false then just log the error | ||
| 68 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device | ||
| 69 | + sampleFormat: portAudio.SampleFormatFloat32, | ||
| 70 | + sampleRate: vad.config.sampleRate | ||
| 71 | + } | ||
| 72 | +}); | ||
| 73 | + | ||
| 74 | +let printed = false; | ||
| 75 | +let index = 0; | ||
| 76 | +ai.on('data', data => { | ||
| 77 | + const windowSize = vad.config.sileroVad.windowSize; | ||
| 78 | + buffer.push(new Float32Array(data.buffer)); | ||
| 79 | + while (buffer.size() > windowSize) { | ||
| 80 | + const samples = buffer.get(buffer.head(), windowSize); | ||
| 81 | + buffer.pop(windowSize); | ||
| 82 | + vad.acceptWaveform(samples); | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + while (!vad.isEmpty()) { | ||
| 86 | + const segment = vad.front(); | ||
| 87 | + vad.pop(); | ||
| 88 | + const stream = recognizer.createStream(); | ||
| 89 | + stream.acceptWaveform({ | ||
| 90 | + samples: segment.samples, | ||
| 91 | + sampleRate: recognizer.config.featConfig.sampleRate | ||
| 92 | + }); | ||
| 93 | + recognizer.decode(stream); | ||
| 94 | + const r = recognizer.getResult(stream); | ||
| 95 | + if (r.text.length > 0) { | ||
| 96 | + const text = r.text.toLowerCase().trim(); | ||
| 97 | + console.log(`${index}: ${text}`); | ||
| 98 | + | ||
| 99 | + const filename = `${index}-${text}-${ | ||
| 100 | + new Date() | ||
| 101 | + .toLocaleTimeString('en-US', {hour12: false}) | ||
| 102 | + .split(' ')[0]}.wav`; | ||
| 103 | + sherpa_onnx.writeWave( | ||
| 104 | + filename, | ||
| 105 | + {samples: segment.samples, sampleRate: vad.config.sampleRate}); | ||
| 106 | + | ||
| 107 | + index += 1; | ||
| 108 | + } | ||
| 109 | + } | ||
| 110 | +}); | ||
| 111 | + | ||
| 112 | +ai.start(); | ||
| 113 | +console.log('Started! Please speak') |
| 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 4 | + | ||
| 5 | +function createRecognizer() { | ||
| 6 | + // Please download test files from | ||
| 7 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 8 | + const config = { | ||
| 9 | + 'featConfig': { | ||
| 10 | + 'sampleRate': 16000, | ||
| 11 | + 'featureDim': 80, | ||
| 12 | + }, | ||
| 13 | + 'modelConfig': { | ||
| 14 | + 'moonshine': { | ||
| 15 | + 'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx', | ||
| 16 | + 'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx', | ||
| 17 | + 'uncachedDecoder': | ||
| 18 | + './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx', | ||
| 19 | + 'cachedDecoder': | ||
| 20 | + './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx', | ||
| 21 | + }, | ||
| 22 | + 'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt', | ||
| 23 | + 'numThreads': 2, | ||
| 24 | + 'provider': 'cpu', | ||
| 25 | + 'debug': 1, | ||
| 26 | + } | ||
| 27 | + }; | ||
| 28 | + | ||
| 29 | + return new sherpa_onnx.OfflineRecognizer(config); | ||
| 30 | +} | ||
| 31 | + | ||
| 32 | +function createVad() { | ||
| 33 | + // please download silero_vad.onnx from | ||
| 34 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 35 | + const config = { | ||
| 36 | + sileroVad: { | ||
| 37 | + model: './silero_vad.onnx', | ||
| 38 | + threshold: 0.5, | ||
| 39 | + minSpeechDuration: 0.25, | ||
| 40 | + minSilenceDuration: 0.5, | ||
| 41 | + maxSpeechDuration: 5, | ||
| 42 | + windowSize: 512, | ||
| 43 | + }, | ||
| 44 | + sampleRate: 16000, | ||
| 45 | + debug: true, | ||
| 46 | + numThreads: 1, | ||
| 47 | + }; | ||
| 48 | + | ||
| 49 | + const bufferSizeInSeconds = 60; | ||
| 50 | + | ||
| 51 | + return new sherpa_onnx.Vad(config, bufferSizeInSeconds); | ||
| 52 | +} | ||
| 53 | + | ||
| 54 | +const recognizer = createRecognizer(); | ||
| 55 | +const vad = createVad(); | ||
| 56 | + | ||
| 57 | +// please download ./Obama.wav from | ||
| 58 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 59 | +const waveFilename = './Obama.wav'; | ||
| 60 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 61 | + | ||
| 62 | +if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 63 | + throw new Error( | ||
| 64 | + 'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}'); | ||
| 65 | +} | ||
| 66 | + | ||
| 67 | +console.log('Started') | ||
| 68 | +let start = Date.now(); | ||
| 69 | + | ||
| 70 | +const windowSize = vad.config.sileroVad.windowSize; | ||
| 71 | +for (let i = 0; i < wave.samples.length; i += windowSize) { | ||
| 72 | + const thisWindow = wave.samples.subarray(i, i + windowSize); | ||
| 73 | + vad.acceptWaveform(thisWindow); | ||
| 74 | + | ||
| 75 | + while (!vad.isEmpty()) { | ||
| 76 | + const segment = vad.front(); | ||
| 77 | + vad.pop(); | ||
| 78 | + | ||
| 79 | + let start_time = segment.start / wave.sampleRate; | ||
| 80 | + let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
| 81 | + | ||
| 82 | + start_time = start_time.toFixed(2); | ||
| 83 | + end_time = end_time.toFixed(2); | ||
| 84 | + | ||
| 85 | + const stream = recognizer.createStream(); | ||
| 86 | + stream.acceptWaveform( | ||
| 87 | + {samples: segment.samples, sampleRate: wave.sampleRate}); | ||
| 88 | + | ||
| 89 | + recognizer.decode(stream); | ||
| 90 | + const r = recognizer.getResult(stream); | ||
| 91 | + if (r.text.length > 0) { | ||
| 92 | + const text = r.text.toLowerCase().trim(); | ||
| 93 | + console.log(`${start_time} -- ${end_time}: ${text}`); | ||
| 94 | + } | ||
| 95 | + } | ||
| 96 | +} | ||
| 97 | + | ||
| 98 | +vad.flush(); | ||
| 99 | + | ||
| 100 | +while (!vad.isEmpty()) { | ||
| 101 | + const segment = vad.front(); | ||
| 102 | + vad.pop(); | ||
| 103 | + | ||
| 104 | + let start_time = segment.start / wave.sampleRate; | ||
| 105 | + let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
| 106 | + | ||
| 107 | + start_time = start_time.toFixed(2); | ||
| 108 | + end_time = end_time.toFixed(2); | ||
| 109 | + | ||
| 110 | + const stream = recognizer.createStream(); | ||
| 111 | + stream.acceptWaveform( | ||
| 112 | + {samples: segment.samples, sampleRate: wave.sampleRate}); | ||
| 113 | + | ||
| 114 | + recognizer.decode(stream); | ||
| 115 | + const r = recognizer.getResult(stream); | ||
| 116 | + if (r.text.length > 0) { | ||
| 117 | + const text = r.text.toLowerCase().trim(); | ||
| 118 | + console.log(`${start_time} -- ${end_time}: ${text}`); | ||
| 119 | + } | ||
| 120 | +} | ||
| 121 | + | ||
| 122 | +let stop = Date.now(); | ||
| 123 | +console.log('Done') | ||
| 124 | + | ||
| 125 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 126 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 127 | +const real_time_factor = elapsed_seconds / duration; | ||
| 128 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 129 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 130 | +console.log( | ||
| 131 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 132 | + real_time_factor.toFixed(3)) |
| @@ -133,7 +133,25 @@ tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | @@ -133,7 +133,25 @@ tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2 | ||
| 133 | node ./test-offline-transducer.js | 133 | node ./test-offline-transducer.js |
| 134 | ``` | 134 | ``` |
| 135 | 135 | ||
| 136 | +## ./test-vad-with-non-streaming-asr-whisper.js | ||
| 137 | + | ||
| 138 | +[./test-vad-with-non-streaming-asr-whisper.js](./test-vad-with-non-streaming-asr-whisper.js) | ||
| 139 | +shows how to use VAD + whisper to decode a very long file. | ||
| 140 | + | ||
| 141 | +You can use the following command to run it: | ||
| 142 | + | ||
| 143 | +```bash | ||
| 144 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 145 | +tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 146 | + | ||
| 147 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 148 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 149 | + | ||
| 150 | +node ./test-vad-with-non-streaming-asr-whisper.js | ||
| 151 | +``` | ||
| 152 | + | ||
| 136 | ## ./test-offline-whisper.js | 153 | ## ./test-offline-whisper.js |
| 154 | + | ||
| 137 | [./test-offline-whisper.js](./test-offline-whisper.js) demonstrates | 155 | [./test-offline-whisper.js](./test-offline-whisper.js) demonstrates |
| 138 | how to decode a file with a Whisper model. In the code we use | 156 | how to decode a file with a Whisper model. In the code we use |
| 139 | [sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html). | 157 | [sherpa-onnx-whisper-tiny.en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html). |
| @@ -146,7 +164,40 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | @@ -146,7 +164,40 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 146 | node ./test-offline-whisper.js | 164 | node ./test-offline-whisper.js |
| 147 | ``` | 165 | ``` |
| 148 | 166 | ||
| 167 | +## ./test-offline-moonshine.js | ||
| 168 | + | ||
| 169 | +[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates | ||
| 170 | +how to decode a file with a Moonshine model. In the code we use | ||
| 171 | +[sherpa-onnx-moonshine-tiny-en-int8](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2). | ||
| 172 | + | ||
| 173 | +You can use the following command to run it: | ||
| 174 | + | ||
| 175 | +```bash | ||
| 176 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 177 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 178 | + | ||
| 179 | +node ./test-offline-moonshine.js | ||
| 180 | +``` | ||
| 181 | + | ||
| 182 | +## ./test-vad-with-non-streaming-asr-moonshine.js | ||
| 183 | + | ||
| 184 | +[./test-vad-with-non-streaming-asr-moonshine.js](./test-vad-with-non-streaming-asr-moonshine.js) | ||
| 185 | +shows how to use VAD + whisper to decode a very long file. | ||
| 186 | + | ||
| 187 | +You can use the following command to run it: | ||
| 188 | + | ||
| 189 | +```bash | ||
| 190 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 191 | +tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2 | ||
| 192 | + | ||
| 193 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 194 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 195 | + | ||
| 196 | +node ./test-vad-with-non-streaming-asr-moonshine.js | ||
| 197 | +``` | ||
| 198 | + | ||
| 149 | ## ./test-online-paraformer-microphone.js | 199 | ## ./test-online-paraformer-microphone.js |
| 200 | + | ||
| 150 | [./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js) | 201 | [./test-online-paraformer-microphone.js](./test-online-paraformer-microphone.js) |
| 151 | demonstrates how to do real-time speech recognition from microphone | 202 | demonstrates how to do real-time speech recognition from microphone |
| 152 | with a streaming Paraformer model. In the code we use | 203 | with a streaming Paraformer model. In the code we use |
nodejs-examples/test-offline-moonshine.js
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createOfflineRecognizer() { | ||
| 6 | + let modelConfig = { | ||
| 7 | + moonshine: { | ||
| 8 | + preprocessor: './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx', | ||
| 9 | + encoder: './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx', | ||
| 10 | + uncachedDecoder: | ||
| 11 | + './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx', | ||
| 12 | + cachedDecoder: | ||
| 13 | + './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx', | ||
| 14 | + }, | ||
| 15 | + tokens: './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt', | ||
| 16 | + }; | ||
| 17 | + | ||
| 18 | + let config = { | ||
| 19 | + modelConfig: modelConfig, | ||
| 20 | + }; | ||
| 21 | + | ||
| 22 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 23 | +} | ||
| 24 | + | ||
| 25 | +recognizer = createOfflineRecognizer(); | ||
| 26 | +stream = recognizer.createStream(); | ||
| 27 | + | ||
| 28 | +const waveFilename = './sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav'; | ||
| 29 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 30 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 31 | + | ||
| 32 | +recognizer.decode(stream); | ||
| 33 | +const text = recognizer.getResult(stream).text; | ||
| 34 | +console.log(text); | ||
| 35 | + | ||
| 36 | +stream.free(); | ||
| 37 | +recognizer.free(); |
| 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createRecognizer() { | ||
| 6 | + // Please download test files from | ||
| 7 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 8 | + const config = { | ||
| 9 | + 'modelConfig': { | ||
| 10 | + 'moonshine': { | ||
| 11 | + 'preprocessor': './sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx', | ||
| 12 | + 'encoder': './sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx', | ||
| 13 | + 'uncachedDecoder': | ||
| 14 | + './sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx', | ||
| 15 | + 'cachedDecoder': | ||
| 16 | + './sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx', | ||
| 17 | + }, | ||
| 18 | + 'tokens': './sherpa-onnx-moonshine-tiny-en-int8/tokens.txt', | ||
| 19 | + 'debug': 0, | ||
| 20 | + } | ||
| 21 | + }; | ||
| 22 | + | ||
| 23 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 24 | +} | ||
| 25 | + | ||
| 26 | +function createVad() { | ||
| 27 | + // please download silero_vad.onnx from | ||
| 28 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 29 | + const config = { | ||
| 30 | + sileroVad: { | ||
| 31 | + model: './silero_vad.onnx', | ||
| 32 | + threshold: 0.5, | ||
| 33 | + minSpeechDuration: 0.25, | ||
| 34 | + minSilenceDuration: 0.5, | ||
| 35 | + maxSpeechDuration: 5, | ||
| 36 | + windowSize: 512, | ||
| 37 | + }, | ||
| 38 | + sampleRate: 16000, | ||
| 39 | + debug: true, | ||
| 40 | + numThreads: 1, | ||
| 41 | + bufferSizeInSeconds: 60, | ||
| 42 | + }; | ||
| 43 | + | ||
| 44 | + return sherpa_onnx.createVad(config); | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +const recognizer = createRecognizer(); | ||
| 48 | +const vad = createVad(); | ||
| 49 | + | ||
| 50 | +// please download ./Obama.wav from | ||
| 51 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 52 | +const waveFilename = './Obama.wav'; | ||
| 53 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 54 | + | ||
| 55 | +if (wave.sampleRate != recognizer.config.featConfig.sampleRate) { | ||
| 56 | + throw new Error( | ||
| 57 | + 'Expected sample rate: ${recognizer.config.featConfig.sampleRate}. Given: ${wave.sampleRate}'); | ||
| 58 | +} | ||
| 59 | + | ||
| 60 | +console.log('Started') | ||
| 61 | +let start = Date.now(); | ||
| 62 | + | ||
| 63 | +const windowSize = vad.config.sileroVad.windowSize; | ||
| 64 | +for (let i = 0; i < wave.samples.length; i += windowSize) { | ||
| 65 | + const thisWindow = wave.samples.subarray(i, i + windowSize); | ||
| 66 | + vad.acceptWaveform(thisWindow); | ||
| 67 | + | ||
| 68 | + while (!vad.isEmpty()) { | ||
| 69 | + const segment = vad.front(); | ||
| 70 | + vad.pop(); | ||
| 71 | + | ||
| 72 | + let start_time = segment.start / wave.sampleRate; | ||
| 73 | + let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
| 74 | + | ||
| 75 | + start_time = start_time.toFixed(2); | ||
| 76 | + end_time = end_time.toFixed(2); | ||
| 77 | + | ||
| 78 | + const stream = recognizer.createStream(); | ||
| 79 | + stream.acceptWaveform(wave.sampleRate, segment.samples); | ||
| 80 | + | ||
| 81 | + recognizer.decode(stream); | ||
| 82 | + const r = recognizer.getResult(stream); | ||
| 83 | + if (r.text.length > 0) { | ||
| 84 | + const text = r.text.toLowerCase().trim(); | ||
| 85 | + console.log(`${start_time} -- ${end_time}: ${text}`); | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + stream.free(); | ||
| 89 | + } | ||
| 90 | +} | ||
| 91 | + | ||
| 92 | +vad.flush(); | ||
| 93 | + | ||
| 94 | +while (!vad.isEmpty()) { | ||
| 95 | + const segment = vad.front(); | ||
| 96 | + vad.pop(); | ||
| 97 | + | ||
| 98 | + let start_time = segment.start / wave.sampleRate; | ||
| 99 | + let end_time = start_time + segment.samples.length / wave.sampleRate; | ||
| 100 | + | ||
| 101 | + start_time = start_time.toFixed(2); | ||
| 102 | + end_time = end_time.toFixed(2); | ||
| 103 | + | ||
| 104 | + const stream = recognizer.createStream(); | ||
| 105 | + stream.acceptWaveform(wave.sampleRate, segment.samples); | ||
| 106 | + | ||
| 107 | + recognizer.decode(stream); | ||
| 108 | + const r = recognizer.getResult(stream); | ||
| 109 | + if (r.text.length > 0) { | ||
| 110 | + const text = r.text.toLowerCase().trim(); | ||
| 111 | + console.log(`${start_time} -- ${end_time}: ${text}`); | ||
| 112 | + } | ||
| 113 | +} | ||
| 114 | + | ||
| 115 | +let stop = Date.now(); | ||
| 116 | +console.log('Done') | ||
| 117 | + | ||
| 118 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 119 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 120 | +const real_time_factor = elapsed_seconds / duration; | ||
| 121 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 122 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 123 | +console.log( | ||
| 124 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 125 | + real_time_factor.toFixed(3)) | ||
| 126 | + | ||
| 127 | +vad.free(); | ||
| 128 | +recognizer.free(); |
| @@ -41,4 +41,11 @@ | @@ -41,4 +41,11 @@ | ||
| 41 | } \ | 41 | } \ |
| 42 | } while (0) | 42 | } while (0) |
| 43 | 43 | ||
| 44 | +#define SHERPA_ONNX_DELETE_C_STR(p) \ | ||
| 45 | + do { \ | ||
| 46 | + if (p) { \ | ||
| 47 | + delete[] p; \ | ||
| 48 | + } \ | ||
| 49 | + } while (0) | ||
| 50 | + | ||
| 44 | #endif // SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_ | 51 | #endif // SCRIPTS_NODE_ADDON_API_SRC_MACROS_H_ |
| @@ -80,6 +80,25 @@ static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig( | @@ -80,6 +80,25 @@ static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig( | ||
| 80 | return c; | 80 | return c; |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | +static SherpaOnnxOfflineMoonshineModelConfig GetOfflineMoonshineModelConfig( | ||
| 84 | + Napi::Object obj) { | ||
| 85 | + SherpaOnnxOfflineMoonshineModelConfig c; | ||
| 86 | + memset(&c, 0, sizeof(c)); | ||
| 87 | + | ||
| 88 | + if (!obj.Has("moonshine") || !obj.Get("moonshine").IsObject()) { | ||
| 89 | + return c; | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + Napi::Object o = obj.Get("moonshine").As<Napi::Object>(); | ||
| 93 | + | ||
| 94 | + SHERPA_ONNX_ASSIGN_ATTR_STR(preprocessor, preprocessor); | ||
| 95 | + SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder); | ||
| 96 | + SHERPA_ONNX_ASSIGN_ATTR_STR(uncached_decoder, uncachedDecoder); | ||
| 97 | + SHERPA_ONNX_ASSIGN_ATTR_STR(cached_decoder, cachedDecoder); | ||
| 98 | + | ||
| 99 | + return c; | ||
| 100 | +} | ||
| 101 | + | ||
| 83 | static SherpaOnnxOfflineTdnnModelConfig GetOfflineTdnnModelConfig( | 102 | static SherpaOnnxOfflineTdnnModelConfig GetOfflineTdnnModelConfig( |
| 84 | Napi::Object obj) { | 103 | Napi::Object obj) { |
| 85 | SherpaOnnxOfflineTdnnModelConfig c; | 104 | SherpaOnnxOfflineTdnnModelConfig c; |
| @@ -130,6 +149,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | @@ -130,6 +149,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | ||
| 130 | c.whisper = GetOfflineWhisperModelConfig(o); | 149 | c.whisper = GetOfflineWhisperModelConfig(o); |
| 131 | c.tdnn = GetOfflineTdnnModelConfig(o); | 150 | c.tdnn = GetOfflineTdnnModelConfig(o); |
| 132 | c.sense_voice = GetOfflineSenseVoiceModelConfig(o); | 151 | c.sense_voice = GetOfflineSenseVoiceModelConfig(o); |
| 152 | + c.moonshine = GetOfflineMoonshineModelConfig(o); | ||
| 133 | 153 | ||
| 134 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 154 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 135 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 155 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| @@ -206,97 +226,42 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | @@ -206,97 +226,42 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { | ||
| 206 | const SherpaOnnxOfflineRecognizer *recognizer = | 226 | const SherpaOnnxOfflineRecognizer *recognizer = |
| 207 | SherpaOnnxCreateOfflineRecognizer(&c); | 227 | SherpaOnnxCreateOfflineRecognizer(&c); |
| 208 | 228 | ||
| 209 | - if (c.model_config.transducer.encoder) { | ||
| 210 | - delete[] c.model_config.transducer.encoder; | ||
| 211 | - } | ||
| 212 | - | ||
| 213 | - if (c.model_config.transducer.decoder) { | ||
| 214 | - delete[] c.model_config.transducer.decoder; | ||
| 215 | - } | ||
| 216 | - | ||
| 217 | - if (c.model_config.transducer.joiner) { | ||
| 218 | - delete[] c.model_config.transducer.joiner; | ||
| 219 | - } | ||
| 220 | - | ||
| 221 | - if (c.model_config.paraformer.model) { | ||
| 222 | - delete[] c.model_config.paraformer.model; | ||
| 223 | - } | ||
| 224 | - | ||
| 225 | - if (c.model_config.nemo_ctc.model) { | ||
| 226 | - delete[] c.model_config.nemo_ctc.model; | ||
| 227 | - } | ||
| 228 | - | ||
| 229 | - if (c.model_config.whisper.encoder) { | ||
| 230 | - delete[] c.model_config.whisper.encoder; | ||
| 231 | - } | ||
| 232 | - | ||
| 233 | - if (c.model_config.whisper.decoder) { | ||
| 234 | - delete[] c.model_config.whisper.decoder; | ||
| 235 | - } | ||
| 236 | - | ||
| 237 | - if (c.model_config.whisper.language) { | ||
| 238 | - delete[] c.model_config.whisper.language; | ||
| 239 | - } | ||
| 240 | - | ||
| 241 | - if (c.model_config.whisper.task) { | ||
| 242 | - delete[] c.model_config.whisper.task; | ||
| 243 | - } | ||
| 244 | - | ||
| 245 | - if (c.model_config.tdnn.model) { | ||
| 246 | - delete[] c.model_config.tdnn.model; | ||
| 247 | - } | ||
| 248 | - | ||
| 249 | - if (c.model_config.sense_voice.model) { | ||
| 250 | - delete[] c.model_config.sense_voice.model; | ||
| 251 | - } | ||
| 252 | - | ||
| 253 | - if (c.model_config.sense_voice.language) { | ||
| 254 | - delete[] c.model_config.sense_voice.language; | ||
| 255 | - } | ||
| 256 | - | ||
| 257 | - if (c.model_config.tokens) { | ||
| 258 | - delete[] c.model_config.tokens; | ||
| 259 | - } | ||
| 260 | - | ||
| 261 | - if (c.model_config.provider) { | ||
| 262 | - delete[] c.model_config.provider; | ||
| 263 | - } | 229 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.encoder); |
| 230 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.decoder); | ||
| 231 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.transducer.joiner); | ||
| 264 | 232 | ||
| 265 | - if (c.model_config.model_type) { | ||
| 266 | - delete[] c.model_config.model_type; | ||
| 267 | - } | 233 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.paraformer.model); |
| 268 | 234 | ||
| 269 | - if (c.model_config.modeling_unit) { | ||
| 270 | - delete[] c.model_config.modeling_unit; | ||
| 271 | - } | 235 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.nemo_ctc.model); |
| 272 | 236 | ||
| 273 | - if (c.model_config.bpe_vocab) { | ||
| 274 | - delete[] c.model_config.bpe_vocab; | ||
| 275 | - } | 237 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.encoder); |
| 238 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.decoder); | ||
| 239 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.language); | ||
| 240 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.whisper.task); | ||
| 276 | 241 | ||
| 277 | - if (c.model_config.telespeech_ctc) { | ||
| 278 | - delete[] c.model_config.telespeech_ctc; | ||
| 279 | - } | 242 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.tdnn.model); |
| 280 | 243 | ||
| 281 | - if (c.lm_config.model) { | ||
| 282 | - delete[] c.lm_config.model; | ||
| 283 | - } | 244 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.model); |
| 245 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.sense_voice.language); | ||
| 284 | 246 | ||
| 285 | - if (c.decoding_method) { | ||
| 286 | - delete[] c.decoding_method; | ||
| 287 | - } | 247 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.preprocessor); |
| 248 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.encoder); | ||
| 249 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.uncached_decoder); | ||
| 250 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.moonshine.cached_decoder); | ||
| 288 | 251 | ||
| 289 | - if (c.hotwords_file) { | ||
| 290 | - delete[] c.hotwords_file; | ||
| 291 | - } | 252 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); |
| 253 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); | ||
| 254 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); | ||
| 255 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.modeling_unit); | ||
| 256 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.bpe_vocab); | ||
| 257 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.telespeech_ctc); | ||
| 292 | 258 | ||
| 293 | - if (c.rule_fsts) { | ||
| 294 | - delete[] c.rule_fsts; | ||
| 295 | - } | 259 | + SHERPA_ONNX_DELETE_C_STR(c.lm_config.model); |
| 296 | 260 | ||
| 297 | - if (c.rule_fars) { | ||
| 298 | - delete[] c.rule_fars; | ||
| 299 | - } | 261 | + SHERPA_ONNX_DELETE_C_STR(c.decoding_method); |
| 262 | + SHERPA_ONNX_DELETE_C_STR(c.hotwords_file); | ||
| 263 | + SHERPA_ONNX_DELETE_C_STR(c.rule_fsts); | ||
| 264 | + SHERPA_ONNX_DELETE_C_STR(c.rule_fars); | ||
| 300 | 265 | ||
| 301 | if (!recognizer) { | 266 | if (!recognizer) { |
| 302 | Napi::TypeError::New(env, "Please check your config!") | 267 | Napi::TypeError::New(env, "Please check your config!") |
| @@ -35,6 +35,10 @@ function freeConfig(config, Module) { | @@ -35,6 +35,10 @@ function freeConfig(config, Module) { | ||
| 35 | freeConfig(config.whisper, Module) | 35 | freeConfig(config.whisper, Module) |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | + if ('moonshine' in config) { | ||
| 39 | + freeConfig(config.moonshine, Module) | ||
| 40 | + } | ||
| 41 | + | ||
| 38 | if ('tdnn' in config) { | 42 | if ('tdnn' in config) { |
| 39 | freeConfig(config.tdnn, Module) | 43 | freeConfig(config.tdnn, Module) |
| 40 | } | 44 | } |
| @@ -563,7 +567,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | @@ -563,7 +567,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 563 | const n = encoderLen + decoderLen + languageLen + taskLen; | 567 | const n = encoderLen + decoderLen + languageLen + taskLen; |
| 564 | const buffer = Module._malloc(n); | 568 | const buffer = Module._malloc(n); |
| 565 | 569 | ||
| 566 | - const len = 5 * 4; // 4 pointers | 570 | + const len = 5 * 4; // 4 pointers + 1 int32 |
| 567 | const ptr = Module._malloc(len); | 571 | const ptr = Module._malloc(len); |
| 568 | 572 | ||
| 569 | let offset = 0; | 573 | let offset = 0; |
| @@ -598,6 +602,55 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | @@ -598,6 +602,55 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | ||
| 598 | } | 602 | } |
| 599 | } | 603 | } |
| 600 | 604 | ||
| 605 | +function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) { | ||
| 606 | + const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1; | ||
| 607 | + const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | ||
| 608 | + const uncachedDecoderLen = | ||
| 609 | + Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1; | ||
| 610 | + const cachedDecoderLen = | ||
| 611 | + Module.lengthBytesUTF8(config.cachedDecoder || '') + 1; | ||
| 612 | + | ||
| 613 | + const n = | ||
| 614 | + preprocessorLen + encoderLen + uncachedDecoderLen + cachedDecoderLen; | ||
| 615 | + const buffer = Module._malloc(n); | ||
| 616 | + | ||
| 617 | + const len = 4 * 4; // 4 pointers | ||
| 618 | + const ptr = Module._malloc(len); | ||
| 619 | + | ||
| 620 | + let offset = 0; | ||
| 621 | + Module.stringToUTF8( | ||
| 622 | + config.preprocessor || '', buffer + offset, preprocessorLen); | ||
| 623 | + offset += preprocessorLen; | ||
| 624 | + | ||
| 625 | + Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen); | ||
| 626 | + offset += encoderLen; | ||
| 627 | + | ||
| 628 | + Module.stringToUTF8( | ||
| 629 | + config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen); | ||
| 630 | + offset += uncachedDecoderLen; | ||
| 631 | + | ||
| 632 | + Module.stringToUTF8( | ||
| 633 | + config.cachedDecoder || '', buffer + offset, cachedDecoderLen); | ||
| 634 | + offset += cachedDecoderLen; | ||
| 635 | + | ||
| 636 | + offset = 0; | ||
| 637 | + Module.setValue(ptr, buffer + offset, 'i8*'); | ||
| 638 | + offset += preprocessorLen; | ||
| 639 | + | ||
| 640 | + Module.setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 641 | + offset += encoderLen; | ||
| 642 | + | ||
| 643 | + Module.setValue(ptr + 8, buffer + offset, 'i8*'); | ||
| 644 | + offset += uncachedDecoderLen; | ||
| 645 | + | ||
| 646 | + Module.setValue(ptr + 12, buffer + offset, 'i8*'); | ||
| 647 | + offset += cachedDecoderLen; | ||
| 648 | + | ||
| 649 | + return { | ||
| 650 | + buffer: buffer, ptr: ptr, len: len, | ||
| 651 | + } | ||
| 652 | +} | ||
| 653 | + | ||
| 601 | function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { | 654 | function initSherpaOnnxOfflineTdnnModelConfig(config, Module) { |
| 602 | const n = Module.lengthBytesUTF8(config.model || '') + 1; | 655 | const n = Module.lengthBytesUTF8(config.model || '') + 1; |
| 603 | const buffer = Module._malloc(n); | 656 | const buffer = Module._malloc(n); |
| @@ -693,6 +746,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -693,6 +746,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 693 | }; | 746 | }; |
| 694 | } | 747 | } |
| 695 | 748 | ||
| 749 | + if (!('moonshine' in config)) { | ||
| 750 | + config.moonshine = { | ||
| 751 | + preprocessor: '', | ||
| 752 | + encoder: '', | ||
| 753 | + uncachedDecoder: '', | ||
| 754 | + cachedDecoder: '', | ||
| 755 | + }; | ||
| 756 | + } | ||
| 757 | + | ||
| 696 | if (!('tdnn' in config)) { | 758 | if (!('tdnn' in config)) { |
| 697 | config.tdnn = { | 759 | config.tdnn = { |
| 698 | model: '', | 760 | model: '', |
| @@ -724,8 +786,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -724,8 +786,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 724 | const senseVoice = | 786 | const senseVoice = |
| 725 | initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module); | 787 | initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module); |
| 726 | 788 | ||
| 789 | + const moonshine = | ||
| 790 | + initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module); | ||
| 791 | + | ||
| 727 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | 792 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + |
| 728 | - tdnn.len + 8 * 4 + senseVoice.len; | 793 | + tdnn.len + 8 * 4 + senseVoice.len + moonshine.len; |
| 729 | 794 | ||
| 730 | const ptr = Module._malloc(len); | 795 | const ptr = Module._malloc(len); |
| 731 | 796 | ||
| @@ -745,7 +810,6 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -745,7 +810,6 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 745 | Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); | 810 | Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset); |
| 746 | offset += tdnn.len; | 811 | offset += tdnn.len; |
| 747 | 812 | ||
| 748 | - | ||
| 749 | const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | 813 | const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; |
| 750 | const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; | 814 | const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
| 751 | const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; | 815 | const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1; |
| @@ -817,11 +881,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -817,11 +881,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 817 | offset += 4; | 881 | offset += 4; |
| 818 | 882 | ||
| 819 | Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset); | 883 | Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset); |
| 884 | + offset += senseVoice.len; | ||
| 885 | + | ||
| 886 | + Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset); | ||
| 820 | 887 | ||
| 821 | return { | 888 | return { |
| 822 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 889 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 823 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, | 890 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, |
| 824 | - senseVoice: senseVoice, | 891 | + senseVoice: senseVoice, moonshine: moonshine, |
| 825 | } | 892 | } |
| 826 | } | 893 | } |
| 827 | 894 |
| @@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | @@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | ||
| 15 | 15 | ||
| 16 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); | 16 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); |
| 17 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); | 17 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); |
| 18 | +static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, ""); | ||
| 18 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); | 19 | static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); |
| 19 | static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); | 20 | static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, ""); |
| 20 | static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); | 21 | static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); |
| @@ -25,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | @@ -25,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | ||
| 25 | sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) + | 26 | sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) + |
| 26 | sizeof(SherpaOnnxOfflineWhisperModelConfig) + | 27 | sizeof(SherpaOnnxOfflineWhisperModelConfig) + |
| 27 | sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 + | 28 | sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 + |
| 28 | - sizeof(SherpaOnnxOfflineSenseVoiceModelConfig), | 29 | + sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) + |
| 30 | + sizeof(SherpaOnnxOfflineMoonshineModelConfig), | ||
| 29 | ""); | 31 | ""); |
| 30 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 32 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 31 | static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == | 33 | static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == |
| @@ -66,6 +68,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -66,6 +68,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 66 | auto whisper = &model_config->whisper; | 68 | auto whisper = &model_config->whisper; |
| 67 | auto tdnn = &model_config->tdnn; | 69 | auto tdnn = &model_config->tdnn; |
| 68 | auto sense_voice = &model_config->sense_voice; | 70 | auto sense_voice = &model_config->sense_voice; |
| 71 | + auto moonshine = &model_config->moonshine; | ||
| 69 | 72 | ||
| 70 | fprintf(stdout, "----------offline transducer model config----------\n"); | 73 | fprintf(stdout, "----------offline transducer model config----------\n"); |
| 71 | fprintf(stdout, "encoder: %s\n", transducer->encoder); | 74 | fprintf(stdout, "encoder: %s\n", transducer->encoder); |
| @@ -93,6 +96,12 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -93,6 +96,12 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 93 | fprintf(stdout, "language: %s\n", sense_voice->language); | 96 | fprintf(stdout, "language: %s\n", sense_voice->language); |
| 94 | fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn); | 97 | fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn); |
| 95 | 98 | ||
| 99 | + fprintf(stdout, "----------offline moonshine model config----------\n"); | ||
| 100 | + fprintf(stdout, "preprocessor: %s\n", moonshine->preprocessor); | ||
| 101 | + fprintf(stdout, "encoder: %s\n", moonshine->encoder); | ||
| 102 | + fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder); | ||
| 103 | + fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder); | ||
| 104 | + | ||
| 96 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 105 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 97 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 106 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 98 | fprintf(stdout, "provider: %s\n", model_config->provider); | 107 | fprintf(stdout, "provider: %s\n", model_config->provider); |
-
请 注册 或 登录 后发表评论