Committed by
GitHub
Add JavaScript API (node-addon) for Kokoro TTS 1.0 (#1808)
正在显示
4 个修改的文件
包含
73 行增加
和
0 行删除
| @@ -85,12 +85,21 @@ fi | @@ -85,12 +85,21 @@ fi | ||
| 85 | 85 | ||
| 86 | echo "----------tts----------" | 86 | echo "----------tts----------" |
| 87 | 87 | ||
| 88 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 89 | +tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 90 | +rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 91 | + | ||
| 92 | +node ./test_tts_non_streaming_kokoro_zh_en.js | ||
| 93 | +ls -lh *.wav | ||
| 94 | +rm -rf kokoro-multi-lang-v1_0 | ||
| 95 | + | ||
| 88 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | 96 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 |
| 89 | tar xf kokoro-en-v0_19.tar.bz2 | 97 | tar xf kokoro-en-v0_19.tar.bz2 |
| 90 | rm kokoro-en-v0_19.tar.bz2 | 98 | rm kokoro-en-v0_19.tar.bz2 |
| 91 | 99 | ||
| 92 | node ./test_tts_non_streaming_kokoro_en.js | 100 | node ./test_tts_non_streaming_kokoro_en.js |
| 93 | ls -lh *.wav | 101 | ls -lh *.wav |
| 102 | +rm -rf kokoro-en-v0_19 | ||
| 94 | 103 | ||
| 95 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | 104 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 |
| 96 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 105 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| @@ -68,6 +68,8 @@ static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig( | @@ -68,6 +68,8 @@ static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig( | ||
| 68 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 68 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 69 | SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir); | 69 | SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir); |
| 70 | SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale); | 70 | SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale); |
| 71 | + SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir); | ||
| 72 | + SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon); | ||
| 71 | 73 | ||
| 72 | return c; | 74 | return c; |
| 73 | } | 75 | } |
| @@ -172,6 +174,8 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper( | @@ -172,6 +174,8 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper( | ||
| 172 | SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.voices); | 174 | SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.voices); |
| 173 | SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.tokens); | 175 | SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.tokens); |
| 174 | SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.data_dir); | 176 | SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.data_dir); |
| 177 | + SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.dict_dir); | ||
| 178 | + SHERPA_ONNX_DELETE_C_STR(c.model.kokoro.lexicon); | ||
| 175 | 179 | ||
| 176 | SHERPA_ONNX_DELETE_C_STR(c.model.provider); | 180 | SHERPA_ONNX_DELETE_C_STR(c.model.provider); |
| 177 | 181 |
| @@ -134,6 +134,7 @@ The following tables list the examples in this folder. | @@ -134,6 +134,7 @@ The following tables list the examples in this folder. | ||
| 134 | |File| Description| | 134 | |File| Description| |
| 135 | |---|---| | 135 | |---|---| |
| 136 | |[./test_tts_non_streaming_kokoro_en.js](./test_tts_non_streaming_kokoro_en.js)| Text-to-speech with a Kokoro English Model| | 136 | |[./test_tts_non_streaming_kokoro_en.js](./test_tts_non_streaming_kokoro_en.js)| Text-to-speech with a Kokoro English Model| |
| 137 | +|[./test_tts_non_streaming_kokoro_zh_en.js](./test_tts_non_streaming_kokoro_zh_en.js)| Text-to-speech with a Kokoro Model supporting Chinese and English| | ||
| 137 | |[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)| | 138 | |[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)| |
| 138 | |[./test_tts_non_streaming_matcha_icefall_zhjs](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)| | 139 | |[./test_tts_non_streaming_matcha_icefall_zhjs](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)| |
| 139 | |[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model| | 140 | |[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model| |
| @@ -348,6 +349,16 @@ npm install naudiodon2 | @@ -348,6 +349,16 @@ npm install naudiodon2 | ||
| 348 | node ./test_vad_asr_non_streaming_sense_voice_microphone.js | 349 | node ./test_vad_asr_non_streaming_sense_voice_microphone.js |
| 349 | ``` | 350 | ``` |
| 350 | 351 | ||
| 352 | +### Text-to-speech with Kokoro TTS models (Chinese + English TTS) | ||
| 353 | + | ||
| 354 | +```bash | ||
| 355 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 356 | +tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 357 | +rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 358 | + | ||
| 359 | +node ./test_tts_non_streaming_kokoro_zh_en.js | ||
| 360 | +``` | ||
| 361 | + | ||
| 351 | ### Text-to-speech with Kokoro TTS models (English TTS) | 362 | ### Text-to-speech with Kokoro TTS models (English TTS) |
| 352 | 363 | ||
| 353 | ```bash | 364 | ```bash |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// please refer to | ||
| 5 | +// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 6 | +// to download model files | ||
| 7 | +function createOfflineTts() { | ||
| 8 | + const config = { | ||
| 9 | + model: { | ||
| 10 | + kokoro: { | ||
| 11 | + model: './kokoro-multi-lang-v1_0/model.onnx', | ||
| 12 | + voices: './kokoro-multi-lang-v1_0/voices.bin', | ||
| 13 | + tokens: './kokoro-multi-lang-v1_0/tokens.txt', | ||
| 14 | + dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data', | ||
| 15 | + dictDir: './kokoro-multi-lang-v1_0/dict', | ||
| 16 | + lexicon: | ||
| 17 | + './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt', | ||
| 18 | + }, | ||
| 19 | + debug: true, | ||
| 20 | + numThreads: 1, | ||
| 21 | + provider: 'cpu', | ||
| 22 | + }, | ||
| 23 | + maxNumSentences: 1, | ||
| 24 | + }; | ||
| 25 | + return new sherpa_onnx.OfflineTts(config); | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +const tts = createOfflineTts(); | ||
| 29 | + | ||
| 30 | +const text = | ||
| 31 | + '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?' | ||
| 32 | + | ||
| 33 | +let start = Date.now(); | ||
| 34 | +const audio = tts.generate({text: text, sid: 48, speed: 1.0}); | ||
| 35 | +let stop = Date.now(); | ||
| 36 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 37 | +const duration = audio.samples.length / audio.sampleRate; | ||
| 38 | +const real_time_factor = elapsed_seconds / duration; | ||
| 39 | +console.log('Wave duration', duration.toFixed(3), 'secodns') | ||
| 40 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') | ||
| 41 | +console.log( | ||
| 42 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 43 | + real_time_factor.toFixed(3)) | ||
| 44 | + | ||
| 45 | +const filename = 'test-kokoro-zh-en-48.wav'; | ||
| 46 | +sherpa_onnx.writeWave( | ||
| 47 | + filename, {samples: audio.samples, sampleRate: audio.sampleRate}); | ||
| 48 | + | ||
| 49 | +console.log(`Saved to ${filename}`); |
-
请 注册 或 登录 后发表评论