Committed by
GitHub
Add JavaScript API (node-addon) for Kokoro TTS models (#1725)
正在显示
4 个修改的文件
包含
101 行增加
和
0 行删除
| @@ -85,6 +85,13 @@ fi | @@ -85,6 +85,13 @@ fi | ||
| 85 | 85 | ||
| 86 | echo "----------tts----------" | 86 | echo "----------tts----------" |
| 87 | 87 | ||
| 88 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 89 | +tar xf kokoro-en-v0_19.tar.bz2 | ||
| 90 | +rm kokoro-en-v0_19.tar.bz2 | ||
| 91 | + | ||
| 92 | +node ./test_tts_non_streaming_kokoro_en.js | ||
| 93 | +ls -lh *.wav | ||
| 94 | + | ||
| 88 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | 95 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 |
| 89 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 96 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 90 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 97 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| @@ -53,6 +53,25 @@ static SherpaOnnxOfflineTtsMatchaModelConfig GetOfflineTtsMatchaModelConfig( | @@ -53,6 +53,25 @@ static SherpaOnnxOfflineTtsMatchaModelConfig GetOfflineTtsMatchaModelConfig( | ||
| 53 | return c; | 53 | return c; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | +static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig( | ||
| 57 | + Napi::Object obj) { | ||
| 58 | + SherpaOnnxOfflineTtsKokoroModelConfig c; | ||
| 59 | + memset(&c, 0, sizeof(c)); | ||
| 60 | + | ||
| 61 | + if (!obj.Has("kokoro") || !obj.Get("kokoro").IsObject()) { | ||
| 62 | + return c; | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + Napi::Object o = obj.Get("kokoro").As<Napi::Object>(); | ||
| 66 | + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); | ||
| 67 | + SHERPA_ONNX_ASSIGN_ATTR_STR(voices, voices); | ||
| 68 | + SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | ||
| 69 | + SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir); | ||
| 70 | + SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale); | ||
| 71 | + | ||
| 72 | + return c; | ||
| 73 | +} | ||
| 74 | + | ||
| 56 | static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( | 75 | static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( |
| 57 | Napi::Object obj) { | 76 | Napi::Object obj) { |
| 58 | SherpaOnnxOfflineTtsModelConfig c; | 77 | SherpaOnnxOfflineTtsModelConfig c; |
| @@ -66,6 +85,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( | @@ -66,6 +85,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( | ||
| 66 | 85 | ||
| 67 | c.vits = GetOfflineTtsVitsModelConfig(o); | 86 | c.vits = GetOfflineTtsVitsModelConfig(o); |
| 68 | c.matcha = GetOfflineTtsMatchaModelConfig(o); | 87 | c.matcha = GetOfflineTtsMatchaModelConfig(o); |
| 88 | + c.kokoro = GetOfflineTtsKokoroModelConfig(o); | ||
| 69 | 89 | ||
| 70 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 90 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| 71 | 91 | ||
| @@ -180,6 +200,22 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper( | @@ -180,6 +200,22 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper( | ||
| 180 | delete[] c.model.matcha.dict_dir; | 200 | delete[] c.model.matcha.dict_dir; |
| 181 | } | 201 | } |
| 182 | 202 | ||
| 203 | + if (c.model.kokoro.model) { | ||
| 204 | + delete[] c.model.kokoro.model; | ||
| 205 | + } | ||
| 206 | + | ||
| 207 | + if (c.model.kokoro.voices) { | ||
| 208 | + delete[] c.model.kokoro.voices; | ||
| 209 | + } | ||
| 210 | + | ||
| 211 | + if (c.model.kokoro.tokens) { | ||
| 212 | + delete[] c.model.kokoro.tokens; | ||
| 213 | + } | ||
| 214 | + | ||
| 215 | + if (c.model.kokoro.data_dir) { | ||
| 216 | + delete[] c.model.kokoro.data_dir; | ||
| 217 | + } | ||
| 218 | + | ||
| 183 | if (c.model.provider) { | 219 | if (c.model.provider) { |
| 184 | delete[] c.model.provider; | 220 | delete[] c.model.provider; |
| 185 | } | 221 | } |
| @@ -133,6 +133,7 @@ The following tables list the examples in this folder. | @@ -133,6 +133,7 @@ The following tables list the examples in this folder. | ||
| 133 | 133 | ||
| 134 | |File| Description| | 134 | |File| Description| |
| 135 | |---|---| | 135 | |---|---| |
| 136 | +|[./test_tts_non_streaming_kokoro_en.js](./test_tts_non_streaming_kokoro_en.js)| Text-to-speech with a Kokoro English Model| | ||
| 136 | |[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)| | 137 | |[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)| |
| 137 | |[./test_tts_non_streaming_matcha_icefall_zhjs](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)| | 138 | |[./test_tts_non_streaming_matcha_icefall_zhjs](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)| |
| 138 | |[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model| | 139 | |[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model| |
| @@ -347,6 +348,16 @@ npm install naudiodon2 | @@ -347,6 +348,16 @@ npm install naudiodon2 | ||
| 347 | node ./test_vad_asr_non_streaming_sense_voice_microphone.js | 348 | node ./test_vad_asr_non_streaming_sense_voice_microphone.js |
| 348 | ``` | 349 | ``` |
| 349 | 350 | ||
| 351 | +### Text-to-speech with Kokoro TTS models (English TTS) | ||
| 352 | + | ||
| 353 | +```bash | ||
| 354 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 355 | +tar xf kokoro-en-v0_19.tar.bz2 | ||
| 356 | +rm kokoro-en-v0_19.tar.bz2 | ||
| 357 | + | ||
| 358 | +node ./test_tts_non_streaming_kokoro_en.js | ||
| 359 | +``` | ||
| 360 | + | ||
| 350 | ### Text-to-speech with MatchaTTS models (English TTS) | 361 | ### Text-to-speech with MatchaTTS models (English TTS) |
| 351 | ```bash | 362 | ```bash |
| 352 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | 363 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// please refer to | ||
| 5 | +// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 6 | +// to download model files | ||
| 7 | +function createOfflineTts() { | ||
| 8 | + const config = { | ||
| 9 | + model: { | ||
| 10 | + kokoro: { | ||
| 11 | + model: './kokoro-en-v0_19/model.onnx', | ||
| 12 | + voices: './kokoro-en-v0_19/voices.bin', | ||
| 13 | + tokens: './kokoro-en-v0_19/tokens.txt', | ||
| 14 | + dataDir: './kokoro-en-v0_19/espeak-ng-data', | ||
| 15 | + }, | ||
| 16 | + debug: true, | ||
| 17 | + numThreads: 1, | ||
| 18 | + provider: 'cpu', | ||
| 19 | + }, | ||
| 20 | + maxNumSentences: 1, | ||
| 21 | + }; | ||
| 22 | + return new sherpa_onnx.OfflineTts(config); | ||
| 23 | +} | ||
| 24 | + | ||
| 25 | +const tts = createOfflineTts(); | ||
| 26 | + | ||
| 27 | +const text = | ||
| 28 | + 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.' | ||
| 29 | + | ||
| 30 | + | ||
| 31 | +let start = Date.now(); | ||
| 32 | +const audio = tts.generate({text: text, sid: 6, speed: 1.0}); | ||
| 33 | +let stop = Date.now(); | ||
| 34 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 35 | +const duration = audio.samples.length / audio.sampleRate; | ||
| 36 | +const real_time_factor = elapsed_seconds / duration; | ||
| 37 | +console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 38 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | ||
| 39 | +console.log( | ||
| 40 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 41 | + real_time_factor.toFixed(3)) | ||
| 42 | + | ||
| 43 | +const filename = 'test-kokoro-en-6.wav'; | ||
| 44 | +sherpa_onnx.writeWave( | ||
| 45 | + filename, {samples: audio.samples, sampleRate: audio.sampleRate}); | ||
| 46 | + | ||
| 47 | +console.log(`Saved to ${filename}`); |
-
请 注册 或 登录 后发表评论