Committed by
GitHub
Add audio tagging APIs for node-addon-api (#875)
正在显示
12 个修改的文件
包含
520 行增加
和
16 行删除
| @@ -18,7 +18,7 @@ fi | @@ -18,7 +18,7 @@ fi | ||
| 18 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | 18 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) |
| 19 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | 19 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" |
| 20 | 20 | ||
| 21 | -# SHERPA_ONNX_VERSION=1.0.21 | 21 | +# SHERPA_ONNX_VERSION=1.0.22 |
| 22 | 22 | ||
| 23 | if [ -z $owner ]; then | 23 | if [ -z $owner ]; then |
| 24 | owner=k2-fsa | 24 | owner=k2-fsa |
| @@ -6,6 +6,22 @@ d=nodejs-addon-examples | @@ -6,6 +6,22 @@ d=nodejs-addon-examples | ||
| 6 | echo "dir: $d" | 6 | echo "dir: $d" |
| 7 | cd $d | 7 | cd $d |
| 8 | 8 | ||
| 9 | +echo "----------audio tagging----------" | ||
| 10 | + | ||
| 11 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 12 | +tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 13 | +rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 14 | + | ||
| 15 | +node ./test_audio_tagging_zipformer.js | ||
| 16 | +rm -rf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15 | ||
| 17 | + | ||
| 18 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 19 | +tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 20 | +rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 21 | + | ||
| 22 | +node ./test_audio_tagging_ced.js | ||
| 23 | +rm -rf sherpa-onnx-ced-mini-audio-tagging-2024-04-19 | ||
| 24 | + | ||
| 9 | echo "----------speaker identification----------" | 25 | echo "----------speaker identification----------" |
| 10 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | 26 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx |
| 11 | 27 |
| @@ -33,6 +33,11 @@ jobs: | @@ -33,6 +33,11 @@ jobs: | ||
| 33 | with: | 33 | with: |
| 34 | python-version: ${{ matrix.python-version }} | 34 | python-version: ${{ matrix.python-version }} |
| 35 | 35 | ||
| 36 | + - name: Update pip | ||
| 37 | + shell: bash | ||
| 38 | + run: | | ||
| 39 | + pip install -U pip | ||
| 40 | + | ||
| 36 | - uses: actions/setup-node@v4 | 41 | - uses: actions/setup-node@v4 |
| 37 | with: | 42 | with: |
| 38 | registry-url: 'https://registry.npmjs.org' | 43 | registry-url: 'https://registry.npmjs.org' |
| @@ -55,7 +55,7 @@ jobs: | @@ -55,7 +55,7 @@ jobs: | ||
| 55 | 55 | ||
| 56 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | 56 | SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) |
| 57 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" | 57 | echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" |
| 58 | - # SHERPA_ONNX_VERSION=1.0.21 | 58 | + # SHERPA_ONNX_VERSION=1.0.22 |
| 59 | 59 | ||
| 60 | src_dir=.github/scripts/node-addon | 60 | src_dir=.github/scripts/node-addon |
| 61 | sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json | 61 | sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json |
| @@ -27,7 +27,82 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH | @@ -27,7 +27,82 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH | ||
| 27 | export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH | 27 | export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH |
| 28 | ``` | 28 | ``` |
| 29 | 29 | ||
| 30 | -# Voice Activity detection (VAD) | 30 | +# Examples |
| 31 | + | ||
| 32 | +The following tables list the examples in this folder. | ||
| 33 | + | ||
| 34 | +## Voice activity detection (VAD) | ||
| 35 | + | ||
| 36 | +|File| Description| | ||
| 37 | +|---|---| | ||
| 38 | +|[./test_vad_microphone.js](./test_vad_microphone.js)| VAD with a microphone. It uses [silero-vad](https://github.com/snakers4/silero-vad)| | ||
| 39 | + | ||
| 40 | +## Speaker identification | ||
| 41 | + | ||
| 42 | +|File| Description| | ||
| 43 | +|---|---| | ||
| 44 | +|[ ./test_speaker_identification.js]( ./test_speaker_identification.js)| Speaker identification from a file| | ||
| 45 | + | ||
| 46 | +## Spoken language identification | ||
| 47 | + | ||
| 48 | +|File| Description| | ||
| 49 | +|---|---| | ||
| 50 | +|[./test_vad_spoken_language_identification_microphone.js](./test_vad_spoken_language_identification_microphone.js)|Spoken language identification from a microphone using a multi-lingual [Whisper](https://github.com/openai/whisper) model| | ||
| 51 | + | ||
| 52 | +## Audio tagging | ||
| 53 | + | ||
| 54 | +|File| Description| | ||
| 55 | +|---|---| | ||
| 56 | +|[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model| | ||
| 57 | +|[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model| | ||
| 58 | + | ||
| 59 | +## Streaming speech-to-text from files | ||
| 60 | + | ||
| 61 | +|File| Description| | ||
| 62 | +|---|---| | ||
| 63 | +|[./test_asr_streaming_transducer.js](./test_asr_streaming_transducer.js)| Streaming speech recognition from a file using a Zipformer transducer model| | ||
| 64 | +|[./test_asr_streaming_ctc.js](./test_asr_streaming_ctc.js)| Streaming speech recognition from a file using a Zipformer CTC model with greedy search| | ||
| 65 | +|[./test_asr_streaming_ctc_hlg.js](./test_asr_streaming_ctc_hlg.js)| Streaming speech recognition from a file using a Zipformer CTC model with HLG decoding| | ||
| 66 | +|[./test_asr_streaming_paraformer.js](./test_asr_streaming_paraformer.js)|Streaming speech recognition from a file using a [Paraformer](https://github.com/alibaba-damo-academy/FunASR) model| | ||
| 67 | + | ||
| 68 | +## Streaming speech-to-text from a microphone | ||
| 69 | + | ||
| 70 | +|File| Description| | ||
| 71 | +|---|---| | ||
| 72 | +|[./test_asr_streaming_transducer_microphone.js](./test_asr_streaming_transducer_microphone.js)| Streaming speech recognition from a microphone using a Zipformer transducer model| | ||
| 73 | +|[./test_asr_streaming_ctc_microphone.js](./test_asr_streaming_ctc_microphone.js)| Streaming speech recognition from a microphone using a Zipformer CTC model with greedy search| | ||
| 74 | +|[./test_asr_streaming_ctc_hlg_microphone.js](./test_asr_streaming_ctc_hlg_microphone.js)|Streaming speech recognition from a microphone using a Zipformer CTC model with HLG decoding| | ||
| 75 | +|[./test_asr_streaming_paraformer_microphone.js](./test_asr_streaming_paraformer_microphone.js)| Streaming speech recognition from a microphone using a [Paraformer](https://github.com/alibaba-damo-academy/FunASR) model| | ||
| 76 | + | ||
| 77 | +## Non-Streaming speech-to-text from files | ||
| 78 | + | ||
| 79 | +|File| Description| | ||
| 80 | +|---|---| | ||
| 81 | +|[./test_asr_non_streaming_transducer.js](./test_asr_non_streaming_transducer.js)|Non-streaming speech recognition from a file with a Zipformer transducer model| | ||
| 82 | +|[./test_asr_non_streaming_whisper.js](./test_asr_non_streaming_whisper.js)| Non-streaming speech recognition from a file using [Whisper](https://github.com/openai/whisper)| | ||
| 83 | +|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | ||
| 84 | +|[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | ||
| 85 | + | ||
| 86 | +## Non-Streaming speech-to-text from a microphone with VAD | ||
| 87 | + | ||
| 88 | +|File| Description| | ||
| 89 | +|---|---| | ||
| 90 | +|[./test_vad_asr_non_streaming_transducer_microphone.js](./test_vad_asr_non_streaming_transducer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a Zipformer transducer model| | ||
| 91 | +|[./test_vad_asr_non_streaming_whisper_microphone.js](./test_vad_asr_non_streaming_whisper_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Whisper](https://github.com/openai/whisper)| | ||
| 92 | +|[./test_vad_asr_non_streaming_nemo_ctc_microphone.js](./test_vad_asr_non_streaming_nemo_ctc_microphone.js)|VAD + Non-streaming speech recognition from a microphone using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | ||
| 93 | +|[./test_vad_asr_non_streaming_paraformer_microphone.js](./test_vad_asr_non_streaming_paraformer_microphone.js)|VAD + Non-streaming speech recognition from a microphone using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| | ||
| 94 | + | ||
| 95 | +## Text-to-speech | ||
| 96 | + | ||
| 97 | +|File| Description| | ||
| 98 | +|---|---| | ||
| 99 | +|[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model| | ||
| 100 | +|[./test_tts_non_streaming_vits_coqui_de.js](./test_tts_non_streaming_vits_coqui_de.js)| Text-to-speech with a [coqui](https://github.com/coqui-ai/TTS) German model| | ||
| 101 | +|[./test_tts_non_streaming_vits_zh_ll.js](./test_tts_non_streaming_vits_zh_ll.js)| Text-to-speech with a Chinese model using [cppjieba](https://github.com/yanyiwu/cppjieba)| | ||
| 102 | +|[./test_tts_non_streaming_vits_zh_aishell3.js](./test_tts_non_streaming_vits_zh_aishell3.js)| Text-to-speech with a Chinese TTS model| | ||
| 103 | + | ||
| 104 | + | ||
| 105 | +### Voice Activity detection (VAD) | ||
| 31 | 106 | ||
| 32 | ```bash | 107 | ```bash |
| 33 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | 108 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| @@ -39,7 +114,27 @@ npm install naudiodon2 | @@ -39,7 +114,27 @@ npm install naudiodon2 | ||
| 39 | node ./test_vad_microphone.js | 114 | node ./test_vad_microphone.js |
| 40 | ``` | 115 | ``` |
| 41 | 116 | ||
| 42 | -## Streaming speech recognition with Zipformer transducer | 117 | +### Audio tagging with zipformer |
| 118 | + | ||
| 119 | +```bash | ||
| 120 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 121 | +tar xvf sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 122 | +rm sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 | ||
| 123 | + | ||
| 124 | +node ./test_audio_tagging_zipformer.js | ||
| 125 | +``` | ||
| 126 | + | ||
| 127 | +### Audio tagging with CED | ||
| 128 | + | ||
| 129 | +```bash | ||
| 130 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 131 | +tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 132 | +rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 133 | + | ||
| 134 | +node ./test_audio_tagging_ced.js | ||
| 135 | +``` | ||
| 136 | + | ||
| 137 | +### Streaming speech recognition with Zipformer transducer | ||
| 43 | 138 | ||
| 44 | ```bash | 139 | ```bash |
| 45 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | 140 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 |
| @@ -54,7 +149,7 @@ npm install naudiodon2 | @@ -54,7 +149,7 @@ npm install naudiodon2 | ||
| 54 | node ./test_asr_streaming_transducer_microphone.js | 149 | node ./test_asr_streaming_transducer_microphone.js |
| 55 | ``` | 150 | ``` |
| 56 | 151 | ||
| 57 | -## Streaming speech recognition with Zipformer CTC | 152 | +### Streaming speech recognition with Zipformer CTC |
| 58 | 153 | ||
| 59 | ```bash | 154 | ```bash |
| 60 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | 155 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 |
| @@ -73,7 +168,7 @@ node ./test_asr_streaming_ctc_microphone.js | @@ -73,7 +168,7 @@ node ./test_asr_streaming_ctc_microphone.js | ||
| 73 | node ./test_asr_streaming_ctc_hlg_microphone.js | 168 | node ./test_asr_streaming_ctc_hlg_microphone.js |
| 74 | ``` | 169 | ``` |
| 75 | 170 | ||
| 76 | -## Streaming speech recognition with Paraformer | 171 | +### Streaming speech recognition with Paraformer |
| 77 | 172 | ||
| 78 | ```bash | 173 | ```bash |
| 79 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | 174 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 |
| @@ -88,7 +183,7 @@ npm install naudiodon2 | @@ -88,7 +183,7 @@ npm install naudiodon2 | ||
| 88 | node ./test_asr_streaming_paraformer_microphone.js | 183 | node ./test_asr_streaming_paraformer_microphone.js |
| 89 | ``` | 184 | ``` |
| 90 | 185 | ||
| 91 | -## Non-streaming speech recognition with Zipformer transducer | 186 | +### Non-streaming speech recognition with Zipformer transducer |
| 92 | 187 | ||
| 93 | ```bash | 188 | ```bash |
| 94 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 | 189 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2 |
| @@ -102,7 +197,7 @@ npm install naudiodon2 | @@ -102,7 +197,7 @@ npm install naudiodon2 | ||
| 102 | node ./test_vad_asr_non_streaming_transducer_microphone.js | 197 | node ./test_vad_asr_non_streaming_transducer_microphone.js |
| 103 | ``` | 198 | ``` |
| 104 | 199 | ||
| 105 | -## Non-streaming speech recognition with Whisper | 200 | +### Non-streaming speech recognition with Whisper |
| 106 | 201 | ||
| 107 | ```bash | 202 | ```bash |
| 108 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | 203 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 |
| @@ -116,7 +211,7 @@ npm install naudiodon2 | @@ -116,7 +211,7 @@ npm install naudiodon2 | ||
| 116 | node ./test_vad_asr_non_streaming_whisper_microphone.js | 211 | node ./test_vad_asr_non_streaming_whisper_microphone.js |
| 117 | ``` | 212 | ``` |
| 118 | 213 | ||
| 119 | -## Non-streaming speech recognition with NeMo CTC models | 214 | +### Non-streaming speech recognition with NeMo CTC models |
| 120 | 215 | ||
| 121 | ```bash | 216 | ```bash |
| 122 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | 217 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 |
| @@ -130,7 +225,7 @@ npm install naudiodon2 | @@ -130,7 +225,7 @@ npm install naudiodon2 | ||
| 130 | node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js | 225 | node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js |
| 131 | ``` | 226 | ``` |
| 132 | 227 | ||
| 133 | -## Non-streaming speech recognition with Paraformer | 228 | +### Non-streaming speech recognition with Paraformer |
| 134 | 229 | ||
| 135 | ```bash | 230 | ```bash |
| 136 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 | 231 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 |
| @@ -144,7 +239,7 @@ npm install naudiodon2 | @@ -144,7 +239,7 @@ npm install naudiodon2 | ||
| 144 | node ./test_vad_asr_non_streaming_paraformer_microphone.js | 239 | node ./test_vad_asr_non_streaming_paraformer_microphone.js |
| 145 | ``` | 240 | ``` |
| 146 | 241 | ||
| 147 | -## Text-to-speech with piper VITS models (TTS) | 242 | +### Text-to-speech with piper VITS models (TTS) |
| 148 | 243 | ||
| 149 | ```bash | 244 | ```bash |
| 150 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2 | 245 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2 |
| @@ -154,7 +249,7 @@ rm vits-piper-en_GB-cori-medium.tar.bz2 | @@ -154,7 +249,7 @@ rm vits-piper-en_GB-cori-medium.tar.bz2 | ||
| 154 | node ./test_tts_non_streaming_vits_piper_en.js | 249 | node ./test_tts_non_streaming_vits_piper_en.js |
| 155 | ``` | 250 | ``` |
| 156 | 251 | ||
| 157 | -## Text-to-speech with piper Coqui-ai/TTS models (TTS) | 252 | +### Text-to-speech with piper Coqui-ai/TTS models (TTS) |
| 158 | 253 | ||
| 159 | ```bash | 254 | ```bash |
| 160 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 | 255 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 |
| @@ -164,7 +259,7 @@ rm vits-coqui-de-css10.tar.bz2 | @@ -164,7 +259,7 @@ rm vits-coqui-de-css10.tar.bz2 | ||
| 164 | node ./test_tts_non_streaming_vits_coqui_de.js | 259 | node ./test_tts_non_streaming_vits_coqui_de.js |
| 165 | ``` | 260 | ``` |
| 166 | 261 | ||
| 167 | -## Text-to-speech with vits Chinese models (1/2) | 262 | +### Text-to-speech with vits Chinese models (1/2) |
| 168 | 263 | ||
| 169 | ```bash | 264 | ```bash |
| 170 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 | 265 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 |
| @@ -174,7 +269,7 @@ rm sherpa-onnx-vits-zh-ll.tar.bz2 | @@ -174,7 +269,7 @@ rm sherpa-onnx-vits-zh-ll.tar.bz2 | ||
| 174 | node ./test_tts_non_streaming_vits_zh_ll.js | 269 | node ./test_tts_non_streaming_vits_zh_ll.js |
| 175 | ``` | 270 | ``` |
| 176 | 271 | ||
| 177 | -## Text-to-speech with vits Chinese models (2/2) | 272 | +### Text-to-speech with vits Chinese models (2/2) |
| 178 | 273 | ||
| 179 | ```bash | 274 | ```bash |
| 180 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 | 275 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| @@ -184,7 +279,7 @@ rm vits-icefall-zh-aishell3.tar.bz2 | @@ -184,7 +279,7 @@ rm vits-icefall-zh-aishell3.tar.bz2 | ||
| 184 | node ./test_tts_non_streaming_vits_zh_aishell3.js | 279 | node ./test_tts_non_streaming_vits_zh_aishell3.js |
| 185 | ``` | 280 | ``` |
| 186 | 281 | ||
| 187 | -## Spoken language identification with Whisper multi-lingual models | 282 | +### Spoken language identification with Whisper multi-lingual models |
| 188 | 283 | ||
| 189 | ```bash | 284 | ```bash |
| 190 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | 285 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 |
| @@ -202,7 +297,7 @@ npm install naudiodon2 | @@ -202,7 +297,7 @@ npm install naudiodon2 | ||
| 202 | node ./test_vad_spoken_language_identification_microphone.js | 297 | node ./test_vad_spoken_language_identification_microphone.js |
| 203 | ``` | 298 | ``` |
| 204 | 299 | ||
| 205 | -## Speaker identification | 300 | +### Speaker identification |
| 206 | 301 | ||
| 207 | You can find more models at | 302 | You can find more models at |
| 208 | <https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models> | 303 | <https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models> |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download models files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models | ||
| 6 | +function createAudioTagging() { | ||
| 7 | + const config = { | ||
| 8 | + model: { | ||
| 9 | + ced: './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx', | ||
| 10 | + numThreads: 1, | ||
| 11 | + debug: true, | ||
| 12 | + }, | ||
| 13 | + labels: | ||
| 14 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv', | ||
| 15 | + topK: 5, | ||
| 16 | + }; | ||
| 17 | + return new sherpa_onnx.AudioTagging(config); | ||
| 18 | +} | ||
| 19 | + | ||
| 20 | +const at = createAudioTagging(); | ||
| 21 | + | ||
| 22 | +const testWaves = [ | ||
| 23 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/1.wav', | ||
| 24 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/2.wav', | ||
| 25 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/3.wav', | ||
| 26 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/4.wav', | ||
| 27 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/5.wav', | ||
| 28 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/6.wav', | ||
| 29 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/7.wav', | ||
| 30 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/8.wav', | ||
| 31 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/9.wav', | ||
| 32 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/10.wav', | ||
| 33 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/11.wav', | ||
| 34 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/12.wav', | ||
| 35 | + './sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/13.wav', | ||
| 36 | +]; | ||
| 37 | + | ||
| 38 | +console.log('------'); | ||
| 39 | + | ||
| 40 | +for (let filename of testWaves) { | ||
| 41 | + const start = performance.now(); | ||
| 42 | + const stream = at.createStream(); | ||
| 43 | + const wave = sherpa_onnx.readWave(filename); | ||
| 44 | + stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 45 | + const events = at.compute(stream); | ||
| 46 | + const stop = performance.now(); | ||
| 47 | + | ||
| 48 | + const elapsed_seconds = (stop - start) / 1000; | ||
| 49 | + const duration = wave.samples.length / wave.sampleRate; | ||
| 50 | + const real_time_factor = elapsed_seconds / duration; | ||
| 51 | + | ||
| 52 | + console.log('input file:', filename); | ||
| 53 | + console.log('Probability\t\tName'); | ||
| 54 | + for (let e of events) { | ||
| 55 | + console.log(`${e.prob.toFixed(3)}\t\t\t${e.name}`); | ||
| 56 | + } | ||
| 57 | + console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 58 | + console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | ||
| 59 | + console.log( | ||
| 60 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 61 | + real_time_factor.toFixed(3)) | ||
| 62 | + console.log('------'); | ||
| 63 | +} |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download models files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models | ||
| 6 | +function createAudioTagging() { | ||
| 7 | + const config = { | ||
| 8 | + model: { | ||
| 9 | + zipformer: { | ||
| 10 | + model: | ||
| 11 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/model.int8.onnx' | ||
| 12 | + }, | ||
| 13 | + numThreads: 1, | ||
| 14 | + debug: true, | ||
| 15 | + }, | ||
| 16 | + labels: | ||
| 17 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/class_labels_indices.csv', | ||
| 18 | + topK: 5, | ||
| 19 | + }; | ||
| 20 | + return new sherpa_onnx.AudioTagging(config); | ||
| 21 | +} | ||
| 22 | + | ||
| 23 | +const at = createAudioTagging(); | ||
| 24 | + | ||
| 25 | +const testWaves = [ | ||
| 26 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/1.wav', | ||
| 27 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/2.wav', | ||
| 28 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/3.wav', | ||
| 29 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/4.wav', | ||
| 30 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/5.wav', | ||
| 31 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/6.wav', | ||
| 32 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/7.wav', | ||
| 33 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/8.wav', | ||
| 34 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/9.wav', | ||
| 35 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/10.wav', | ||
| 36 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/11.wav', | ||
| 37 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/12.wav', | ||
| 38 | + './sherpa-onnx-zipformer-small-audio-tagging-2024-04-15/test_wavs/13.wav', | ||
| 39 | +]; | ||
| 40 | + | ||
| 41 | +console.log('------'); | ||
| 42 | + | ||
| 43 | +for (let filename of testWaves) { | ||
| 44 | + const start = performance.now(); | ||
| 45 | + const stream = at.createStream(); | ||
| 46 | + const wave = sherpa_onnx.readWave(filename); | ||
| 47 | + stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 48 | + const events = at.compute(stream); | ||
| 49 | + const stop = performance.now(); | ||
| 50 | + | ||
| 51 | + const elapsed_seconds = (stop - start) / 1000; | ||
| 52 | + const duration = wave.samples.length / wave.sampleRate; | ||
| 53 | + const real_time_factor = elapsed_seconds / duration; | ||
| 54 | + | ||
| 55 | + console.log('input file:', filename); | ||
| 56 | + console.log('Probability\t\tName'); | ||
| 57 | + for (let e of events) { | ||
| 58 | + console.log(`${e.prob.toFixed(3)}\t\t\t${e.name}`); | ||
| 59 | + } | ||
| 60 | + console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 61 | + console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | ||
| 62 | + console.log( | ||
| 63 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 64 | + real_time_factor.toFixed(3)) | ||
| 65 | + console.log('------'); | ||
| 66 | +} |
| @@ -18,6 +18,7 @@ add_definitions(-DNAPI_VERSION=3) | @@ -18,6 +18,7 @@ add_definitions(-DNAPI_VERSION=3) | ||
| 18 | include_directories(${CMAKE_JS_INC}) | 18 | include_directories(${CMAKE_JS_INC}) |
| 19 | 19 | ||
| 20 | set(srcs | 20 | set(srcs |
| 21 | + src/audio-tagging.cc | ||
| 21 | src/non-streaming-asr.cc | 22 | src/non-streaming-asr.cc |
| 22 | src/non-streaming-tts.cc | 23 | src/non-streaming-tts.cc |
| 23 | src/sherpa-onnx-node-addon-api.cc | 24 | src/sherpa-onnx-node-addon-api.cc |
scripts/node-addon-api/lib/audio-tagg.js
0 → 100644
| 1 | +const addon = require('./addon.js'); | ||
| 2 | +const non_streaming_asr = require('./non-streaming-asr.js'); | ||
| 3 | + | ||
| 4 | +class AudioTagging { | ||
| 5 | + constructor(config) { | ||
| 6 | + this.handle = addon.createAudioTagging(config); | ||
| 7 | + this.config = config; | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + createStream() { | ||
| 11 | + return new non_streaming_asr.OfflineStream( | ||
| 12 | + addon.audioTaggingCreateOfflineStream(this.handle)); | ||
| 13 | + } | ||
| 14 | + | ||
| 15 | + /* Return an array. Each element is | ||
| 16 | + * an object {name: "xxx", prob: xxx, index: xxx}; | ||
| 17 | + * | ||
| 18 | + */ | ||
| 19 | + compute(stream, topK = -1) { | ||
| 20 | + return addon.audioTaggingCompute(this.handle, stream.handle, topK); | ||
| 21 | + } | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +module.exports = { | ||
| 25 | + AudioTagging, | ||
| 26 | +} |
| @@ -5,6 +5,7 @@ const non_streaming_tts = require('./non-streaming-tts.js'); | @@ -5,6 +5,7 @@ const non_streaming_tts = require('./non-streaming-tts.js'); | ||
| 5 | const vad = require('./vad.js'); | 5 | const vad = require('./vad.js'); |
| 6 | const slid = require('./spoken-language-identification.js'); | 6 | const slid = require('./spoken-language-identification.js'); |
| 7 | const sid = require('./speaker-identification.js'); | 7 | const sid = require('./speaker-identification.js'); |
| 8 | +const at = require('./audio-tagg.js'); | ||
| 8 | 9 | ||
| 9 | module.exports = { | 10 | module.exports = { |
| 10 | OnlineRecognizer: streaming_asr.OnlineRecognizer, | 11 | OnlineRecognizer: streaming_asr.OnlineRecognizer, |
| @@ -18,4 +19,5 @@ module.exports = { | @@ -18,4 +19,5 @@ module.exports = { | ||
| 18 | SpokenLanguageIdentification: slid.SpokenLanguageIdentification, | 19 | SpokenLanguageIdentification: slid.SpokenLanguageIdentification, |
| 19 | SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor, | 20 | SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor, |
| 20 | SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, | 21 | SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, |
| 22 | + AudioTagging: at.AudioTagging, | ||
| 21 | } | 23 | } |
scripts/node-addon-api/src/audio-tagging.cc
0 → 100644
| 1 | +// scripts/node-addon-api/src/audio-tagging.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +#include <sstream> | ||
| 5 | + | ||
| 6 | +#include "macros.h" // NOLINT | ||
| 7 | +#include "napi.h" // NOLINT | ||
| 8 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 9 | + | ||
| 10 | +static SherpaOnnxOfflineZipformerAudioTaggingModelConfig | ||
| 11 | +GetAudioTaggingZipformerModelConfig(Napi::Object obj) { | ||
| 12 | + SherpaOnnxOfflineZipformerAudioTaggingModelConfig c; | ||
| 13 | + memset(&c, 0, sizeof(c)); | ||
| 14 | + | ||
| 15 | + if (!obj.Has("zipformer") || !obj.Get("zipformer").IsObject()) { | ||
| 16 | + return c; | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + Napi::Object o = obj.Get("zipformer").As<Napi::Object>(); | ||
| 20 | + | ||
| 21 | + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); | ||
| 22 | + | ||
| 23 | + return c; | ||
| 24 | +} | ||
| 25 | + | ||
| 26 | +static SherpaOnnxAudioTaggingModelConfig GetAudioTaggingModelConfig( | ||
| 27 | + Napi::Object obj) { | ||
| 28 | + SherpaOnnxAudioTaggingModelConfig c; | ||
| 29 | + memset(&c, 0, sizeof(c)); | ||
| 30 | + | ||
| 31 | + if (!obj.Has("model") || !obj.Get("model").IsObject()) { | ||
| 32 | + return c; | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + Napi::Object o = obj.Get("model").As<Napi::Object>(); | ||
| 36 | + c.zipformer = GetAudioTaggingZipformerModelConfig(o); | ||
| 37 | + | ||
| 38 | + SHERPA_ONNX_ASSIGN_ATTR_STR(ced, ced); | ||
| 39 | + | ||
| 40 | + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | ||
| 41 | + | ||
| 42 | + if (o.Has("debug") && | ||
| 43 | + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { | ||
| 44 | + if (o.Get("debug").IsBoolean()) { | ||
| 45 | + c.debug = o.Get("debug").As<Napi::Boolean>().Value(); | ||
| 46 | + } else { | ||
| 47 | + c.debug = o.Get("debug").As<Napi::Number>().Int32Value(); | ||
| 48 | + } | ||
| 49 | + } | ||
| 50 | + SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); | ||
| 51 | + | ||
| 52 | + return c; | ||
| 53 | +} | ||
| 54 | + | ||
| 55 | +static Napi::External<SherpaOnnxAudioTagging> CreateAudioTaggingWrapper( | ||
| 56 | + const Napi::CallbackInfo &info) { | ||
| 57 | + Napi::Env env = info.Env(); | ||
| 58 | + if (info.Length() != 1) { | ||
| 59 | + std::ostringstream os; | ||
| 60 | + os << "Expect only 1 argument. Given: " << info.Length(); | ||
| 61 | + | ||
| 62 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 63 | + | ||
| 64 | + return {}; | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + if (!info[0].IsObject()) { | ||
| 68 | + Napi::TypeError::New(env, "You should pass an object as the only argument.") | ||
| 69 | + .ThrowAsJavaScriptException(); | ||
| 70 | + | ||
| 71 | + return {}; | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + Napi::Object o = info[0].As<Napi::Object>(); | ||
| 75 | + | ||
| 76 | + SherpaOnnxAudioTaggingConfig c; | ||
| 77 | + memset(&c, 0, sizeof(c)); | ||
| 78 | + c.model = GetAudioTaggingModelConfig(o); | ||
| 79 | + | ||
| 80 | + SHERPA_ONNX_ASSIGN_ATTR_STR(labels, labels); | ||
| 81 | + SHERPA_ONNX_ASSIGN_ATTR_INT32(top_k, topK); | ||
| 82 | + | ||
| 83 | + const SherpaOnnxAudioTagging *at = SherpaOnnxCreateAudioTagging(&c); | ||
| 84 | + | ||
| 85 | + if (c.model.zipformer.model) { | ||
| 86 | + delete[] c.model.zipformer.model; | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + if (c.model.ced) { | ||
| 90 | + delete[] c.model.ced; | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + if (c.model.provider) { | ||
| 94 | + delete[] c.model.provider; | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + if (c.labels) { | ||
| 98 | + delete[] c.labels; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + if (!at) { | ||
| 102 | + Napi::TypeError::New(env, "Please check your config!") | ||
| 103 | + .ThrowAsJavaScriptException(); | ||
| 104 | + | ||
| 105 | + return {}; | ||
| 106 | + } | ||
| 107 | + | ||
| 108 | + return Napi::External<SherpaOnnxAudioTagging>::New( | ||
| 109 | + env, const_cast<SherpaOnnxAudioTagging *>(at), | ||
| 110 | + [](Napi::Env env, SherpaOnnxAudioTagging *at) { | ||
| 111 | + SherpaOnnxDestroyAudioTagging(at); | ||
| 112 | + }); | ||
| 113 | +} | ||
| 114 | + | ||
| 115 | +static Napi::External<SherpaOnnxOfflineStream> | ||
| 116 | +AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) { | ||
| 117 | + Napi::Env env = info.Env(); | ||
| 118 | + if (info.Length() != 1) { | ||
| 119 | + std::ostringstream os; | ||
| 120 | + os << "Expect only 1 argument. Given: " << info.Length(); | ||
| 121 | + | ||
| 122 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 123 | + | ||
| 124 | + return {}; | ||
| 125 | + } | ||
| 126 | + | ||
| 127 | + if (!info[0].IsExternal()) { | ||
| 128 | + Napi::TypeError::New( | ||
| 129 | + env, "You should pass an audio tagging pointer as the only argument") | ||
| 130 | + .ThrowAsJavaScriptException(); | ||
| 131 | + | ||
| 132 | + return {}; | ||
| 133 | + } | ||
| 134 | + | ||
| 135 | + SherpaOnnxAudioTagging *at = | ||
| 136 | + info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data(); | ||
| 137 | + | ||
| 138 | + const SherpaOnnxOfflineStream *stream = | ||
| 139 | + SherpaOnnxAudioTaggingCreateOfflineStream(at); | ||
| 140 | + | ||
| 141 | + return Napi::External<SherpaOnnxOfflineStream>::New( | ||
| 142 | + env, const_cast<SherpaOnnxOfflineStream *>(stream), | ||
| 143 | + [](Napi::Env env, SherpaOnnxOfflineStream *stream) { | ||
| 144 | + DestroyOfflineStream(stream); | ||
| 145 | + }); | ||
| 146 | +} | ||
| 147 | + | ||
| 148 | +static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) { | ||
| 149 | + Napi::Env env = info.Env(); | ||
| 150 | + if (info.Length() != 3) { | ||
| 151 | + std::ostringstream os; | ||
| 152 | + os << "Expect only 3 arguments. Given: " << info.Length(); | ||
| 153 | + | ||
| 154 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 155 | + | ||
| 156 | + return {}; | ||
| 157 | + } | ||
| 158 | + | ||
| 159 | + if (!info[0].IsExternal()) { | ||
| 160 | + Napi::TypeError::New( | ||
| 161 | + env, "You should pass an audio tagging pointer as the first argument") | ||
| 162 | + .ThrowAsJavaScriptException(); | ||
| 163 | + | ||
| 164 | + return {}; | ||
| 165 | + } | ||
| 166 | + | ||
| 167 | + if (!info[1].IsExternal()) { | ||
| 168 | + Napi::TypeError::New( | ||
| 169 | + env, "You should pass a offline stream pointer as the second argument") | ||
| 170 | + .ThrowAsJavaScriptException(); | ||
| 171 | + | ||
| 172 | + return {}; | ||
| 173 | + } | ||
| 174 | + | ||
| 175 | + if (!info[2].IsNumber()) { | ||
| 176 | + Napi::TypeError::New(env, | ||
| 177 | + "You should pass an integer as the third argument") | ||
| 178 | + .ThrowAsJavaScriptException(); | ||
| 179 | + | ||
| 180 | + return {}; | ||
| 181 | + } | ||
| 182 | + | ||
| 183 | + SherpaOnnxAudioTagging *at = | ||
| 184 | + info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data(); | ||
| 185 | + | ||
| 186 | + SherpaOnnxOfflineStream *stream = | ||
| 187 | + info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data(); | ||
| 188 | + | ||
| 189 | + int32_t top_k = info[2].As<Napi::Number>().Int32Value(); | ||
| 190 | + | ||
| 191 | + const SherpaOnnxAudioEvent *const *events = | ||
| 192 | + SherpaOnnxAudioTaggingCompute(at, stream, top_k); | ||
| 193 | + | ||
| 194 | + auto p = events; | ||
| 195 | + int32_t k = 0; | ||
| 196 | + while (p && *p) { | ||
| 197 | + ++k; | ||
| 198 | + ++p; | ||
| 199 | + } | ||
| 200 | + | ||
| 201 | + Napi::Array ans = Napi::Array::New(env, k); | ||
| 202 | + for (int32_t i = 0; i != k; ++i) { | ||
| 203 | + Napi::Object obj = Napi::Object::New(env); | ||
| 204 | + obj.Set(Napi::String::New(env, "name"), | ||
| 205 | + Napi::String::New(env, events[i]->name)); | ||
| 206 | + obj.Set(Napi::String::New(env, "index"), | ||
| 207 | + Napi::Number::New(env, events[i]->index)); | ||
| 208 | + obj.Set(Napi::String::New(env, "prob"), | ||
| 209 | + Napi::Number::New(env, events[i]->prob)); | ||
| 210 | + ans[i] = obj; | ||
| 211 | + } | ||
| 212 | + | ||
| 213 | + SherpaOnnxAudioTaggingFreeResults(events); | ||
| 214 | + | ||
| 215 | + return ans; | ||
| 216 | +} | ||
| 217 | + | ||
| 218 | +void InitAudioTagging(Napi::Env env, Napi::Object exports) { | ||
| 219 | + exports.Set(Napi::String::New(env, "createAudioTagging"), | ||
| 220 | + Napi::Function::New(env, CreateAudioTaggingWrapper)); | ||
| 221 | + | ||
| 222 | + exports.Set(Napi::String::New(env, "audioTaggingCreateOfflineStream"), | ||
| 223 | + Napi::Function::New(env, AudioTaggingCreateOfflineStreamWrapper)); | ||
| 224 | + | ||
| 225 | + exports.Set(Napi::String::New(env, "audioTaggingCompute"), | ||
| 226 | + Napi::Function::New(env, AudioTaggingComputeWrapper)); | ||
| 227 | +} |
| @@ -19,6 +19,8 @@ void InitSpokenLanguageID(Napi::Env env, Napi::Object exports); | @@ -19,6 +19,8 @@ void InitSpokenLanguageID(Napi::Env env, Napi::Object exports); | ||
| 19 | 19 | ||
| 20 | void InitSpeakerID(Napi::Env env, Napi::Object exports); | 20 | void InitSpeakerID(Napi::Env env, Napi::Object exports); |
| 21 | 21 | ||
| 22 | +void InitAudioTagging(Napi::Env env, Napi::Object exports); | ||
| 23 | + | ||
| 22 | Napi::Object Init(Napi::Env env, Napi::Object exports) { | 24 | Napi::Object Init(Napi::Env env, Napi::Object exports) { |
| 23 | InitStreamingAsr(env, exports); | 25 | InitStreamingAsr(env, exports); |
| 24 | InitNonStreamingAsr(env, exports); | 26 | InitNonStreamingAsr(env, exports); |
| @@ -28,6 +30,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { | @@ -28,6 +30,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { | ||
| 28 | InitWaveWriter(env, exports); | 30 | InitWaveWriter(env, exports); |
| 29 | InitSpokenLanguageID(env, exports); | 31 | InitSpokenLanguageID(env, exports); |
| 30 | InitSpeakerID(env, exports); | 32 | InitSpeakerID(env, exports); |
| 33 | + InitAudioTagging(env, exports); | ||
| 31 | 34 | ||
| 32 | return exports; | 35 | return exports; |
| 33 | } | 36 | } |
-
请 注册 或 登录 后发表评论