Committed by
GitHub
Add nodejs example for parakeet-tdt-0.6b-v2. (#2219)
正在显示
3 个修改的文件
包含
70 行增加
和
0 行删除
| @@ -10,6 +10,14 @@ arch=$(node -p "require('os').arch()") | @@ -10,6 +10,14 @@ arch=$(node -p "require('os').arch()") | ||
| 10 | platform=$(node -p "require('os').platform()") | 10 | platform=$(node -p "require('os').platform()") |
| 11 | node_version=$(node -p "process.versions.node.split('.')[0]") | 11 | node_version=$(node -p "process.versions.node.split('.')[0]") |
| 12 | 12 | ||
| 13 | +echo "----------non-streaming ASR NeMo parakeet tdt----------" | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 | ||
| 15 | +tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 | ||
| 16 | +rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 | ||
| 17 | + | ||
| 18 | +node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js | ||
| 19 | +rm -rf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8 | ||
| 20 | + | ||
| 13 | echo "----------non-streaming ASR dolphin CTC----------" | 21 | echo "----------non-streaming ASR dolphin CTC----------" |
| 14 | 22 | ||
| 15 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | 23 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 |
| @@ -123,6 +123,7 @@ The following tables list the examples in this folder. | @@ -123,6 +123,7 @@ The following tables list the examples in this folder. | ||
| 123 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| | 123 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| |
| 124 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| | 124 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| |
| 125 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | 125 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| |
| 126 | +|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| | ||
| 126 | |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| | 127 | |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| |
| 127 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | 128 | |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| |
| 128 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| | 129 | |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| |
| @@ -361,6 +362,16 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | @@ -361,6 +362,16 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 361 | node ./test_asr_non_streaming_dolphin_ctc.js | 362 | node ./test_asr_non_streaming_dolphin_ctc.js |
| 362 | ``` | 363 | ``` |
| 363 | 364 | ||
| 365 | +### Non-streaming speech recognition with NeMo parakeet-tdt-0.6b-v2 models | ||
| 366 | + | ||
| 367 | +```bash | ||
| 368 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 | ||
| 369 | +tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 | ||
| 370 | +rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 | ||
| 371 | + | ||
| 372 | +node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js | ||
| 373 | +``` | ||
| 374 | + | ||
| 364 | ### Non-streaming speech recognition with NeMo CTC models | 375 | ### Non-streaming speech recognition with NeMo CTC models |
| 365 | 376 | ||
| 366 | ```bash | 377 | ```bash |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'featConfig': { | ||
| 8 | + 'sampleRate': 16000, | ||
| 9 | + 'featureDim': 80, | ||
| 10 | + }, | ||
| 11 | + 'modelConfig': { | ||
| 12 | + 'transducer': { | ||
| 13 | + 'encoder': | ||
| 14 | + './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx', | ||
| 15 | + 'decoder': | ||
| 16 | + './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx', | ||
| 17 | + 'joiner': './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx', | ||
| 18 | + }, | ||
| 19 | + 'tokens': './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt', | ||
| 20 | + 'numThreads': 2, | ||
| 21 | + 'provider': 'cpu', | ||
| 22 | + 'debug': 1, | ||
| 23 | + 'modelType': 'nemo_transducer', | ||
| 24 | + } | ||
| 25 | +}; | ||
| 26 | + | ||
| 27 | +const waveFilename = | ||
| 28 | + './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/test_wavs/0.wav'; | ||
| 29 | + | ||
| 30 | +const recognizer = new sherpa_onnx.OfflineRecognizer(config); | ||
| 31 | +console.log('Started') | ||
| 32 | +let start = Date.now(); | ||
| 33 | +const stream = recognizer.createStream(); | ||
| 34 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 35 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 36 | + | ||
| 37 | +recognizer.decode(stream); | ||
| 38 | +result = recognizer.getResult(stream) | ||
| 39 | +let stop = Date.now(); | ||
| 40 | +console.log('Done') | ||
| 41 | + | ||
| 42 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 43 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 44 | +const real_time_factor = elapsed_seconds / duration; | ||
| 45 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 46 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 47 | +console.log( | ||
| 48 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 49 | + real_time_factor.toFixed(3)) | ||
| 50 | +console.log(waveFilename) | ||
| 51 | +console.log('result\n', result) |
-
请 注册 或 登录 后发表评论