Committed by
GitHub
Add JavaScript API (WebAssembly) for Kokoro TTS 1.0 (#1809)
正在显示
4 个修改的文件
包含
73 行增加
和
3 行删除
| @@ -10,12 +10,21 @@ ls -lh | @@ -10,12 +10,21 @@ ls -lh | ||
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | # offline tts | 12 | # offline tts |
| 13 | +# | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 15 | +tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 16 | +rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 17 | + | ||
| 18 | +node ./test-offline-tts-kokoro-zh-en.js | ||
| 19 | +ls -lh *.wav | ||
| 20 | +rm -rf kokoro-multi-lang-v1_0 | ||
| 13 | 21 | ||
| 14 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | 22 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 |
| 15 | tar xf kokoro-en-v0_19.tar.bz2 | 23 | tar xf kokoro-en-v0_19.tar.bz2 |
| 16 | rm kokoro-en-v0_19.tar.bz2 | 24 | rm kokoro-en-v0_19.tar.bz2 |
| 17 | 25 | ||
| 18 | node ./test-offline-tts-kokoro-en.js | 26 | node ./test-offline-tts-kokoro-en.js |
| 27 | +rm -rf kokoro-en-v0_19 | ||
| 19 | 28 | ||
| 20 | ls -lh | 29 | ls -lh |
| 21 | 30 |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createOfflineTts() { | ||
| 6 | + let offlineTtsKokoroModelConfig = { | ||
| 7 | + model: './kokoro-multi-lang-v1_0/model.onnx', | ||
| 8 | + voices: './kokoro-multi-lang-v1_0/voices.bin', | ||
| 9 | + tokens: './kokoro-multi-lang-v1_0/tokens.txt', | ||
| 10 | + dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data', | ||
| 11 | + dictDir: './kokoro-multi-lang-v1_0/dict', | ||
| 12 | + lexicon: | ||
| 13 | + './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt', | ||
| 14 | + lengthScale: 1.0, | ||
| 15 | + }; | ||
| 16 | + let offlineTtsModelConfig = { | ||
| 17 | + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, | ||
| 18 | + numThreads: 1, | ||
| 19 | + debug: 1, | ||
| 20 | + provider: 'cpu', | ||
| 21 | + }; | ||
| 22 | + | ||
| 23 | + let offlineTtsConfig = { | ||
| 24 | + offlineTtsModelConfig: offlineTtsModelConfig, | ||
| 25 | + maxNumSentences: 1, | ||
| 26 | + }; | ||
| 27 | + | ||
| 28 | + return sherpa_onnx.createOfflineTts(offlineTtsConfig); | ||
| 29 | +} | ||
| 30 | + | ||
| 31 | +const tts = createOfflineTts(); | ||
| 32 | +const speakerId = 49; | ||
| 33 | +const speed = 1.0; | ||
| 34 | +const text = | ||
| 35 | + '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?' | ||
| 36 | + | ||
| 37 | +const audio = tts.generate({text: text, sid: speakerId, speed: speed}); | ||
| 38 | +tts.save('./test-kokoro-zh-en-49.wav', audio); | ||
| 39 | +console.log('Saved to test-kokoro-zh-en-49.wav successfully.'); | ||
| 40 | +tts.free(); |
| @@ -141,12 +141,15 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { | @@ -141,12 +141,15 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { | ||
| 141 | const voicesLen = Module.lengthBytesUTF8(config.voices) + 1; | 141 | const voicesLen = Module.lengthBytesUTF8(config.voices) + 1; |
| 142 | const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; | 142 | const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; |
| 143 | const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; | 143 | const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; |
| 144 | + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; | ||
| 145 | + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; | ||
| 144 | 146 | ||
| 145 | - const n = modelLen + voicesLen + tokensLen + dataDirLen; | 147 | + const n = |
| 148 | + modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen; | ||
| 146 | 149 | ||
| 147 | const buffer = Module._malloc(n); | 150 | const buffer = Module._malloc(n); |
| 148 | 151 | ||
| 149 | - const len = 5 * 4; | 152 | + const len = 7 * 4; |
| 150 | const ptr = Module._malloc(len); | 153 | const ptr = Module._malloc(len); |
| 151 | 154 | ||
| 152 | let offset = 0; | 155 | let offset = 0; |
| @@ -162,6 +165,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { | @@ -162,6 +165,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { | ||
| 162 | Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); | 165 | Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); |
| 163 | offset += dataDirLen; | 166 | offset += dataDirLen; |
| 164 | 167 | ||
| 168 | + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); | ||
| 169 | + offset += dictDirLen; | ||
| 170 | + | ||
| 171 | + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); | ||
| 172 | + offset += lexiconLen; | ||
| 173 | + | ||
| 165 | offset = 0; | 174 | offset = 0; |
| 166 | Module.setValue(ptr, buffer + offset, 'i8*'); | 175 | Module.setValue(ptr, buffer + offset, 'i8*'); |
| 167 | offset += modelLen; | 176 | offset += modelLen; |
| @@ -177,6 +186,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { | @@ -177,6 +186,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { | ||
| 177 | 186 | ||
| 178 | Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float'); | 187 | Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float'); |
| 179 | 188 | ||
| 189 | + Module.setValue(ptr + 20, buffer + offset, 'i8*'); | ||
| 190 | + offset += dictDirLen; | ||
| 191 | + | ||
| 192 | + Module.setValue(ptr + 24, buffer + offset, 'i8*'); | ||
| 193 | + offset += lexiconLen; | ||
| 194 | + | ||
| 180 | return { | 195 | return { |
| 181 | buffer: buffer, ptr: ptr, len: len, | 196 | buffer: buffer, ptr: ptr, len: len, |
| 182 | } | 197 | } |
| @@ -216,6 +231,8 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { | @@ -216,6 +231,8 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { | ||
| 216 | tokens: '', | 231 | tokens: '', |
| 217 | lengthScale: 1.0, | 232 | lengthScale: 1.0, |
| 218 | dataDir: '', | 233 | dataDir: '', |
| 234 | + dictDir: '', | ||
| 235 | + lexicon: '', | ||
| 219 | }; | 236 | }; |
| 220 | } | 237 | } |
| 221 | 238 | ||
| @@ -382,6 +399,8 @@ function createOfflineTts(Module, myConfig) { | @@ -382,6 +399,8 @@ function createOfflineTts(Module, myConfig) { | ||
| 382 | tokens: '', | 399 | tokens: '', |
| 383 | dataDir: '', | 400 | dataDir: '', |
| 384 | lengthScale: 1.0, | 401 | lengthScale: 1.0, |
| 402 | + dictDir: '', | ||
| 403 | + lexicon: '', | ||
| 385 | }; | 404 | }; |
| 386 | 405 | ||
| 387 | const offlineTtsModelConfig = { | 406 | const offlineTtsModelConfig = { |
| @@ -15,7 +15,7 @@ extern "C" { | @@ -15,7 +15,7 @@ extern "C" { | ||
| 15 | 15 | ||
| 16 | static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, ""); | 16 | static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, ""); |
| 17 | static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, ""); | 17 | static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, ""); |
| 18 | -static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 5 * 4, ""); | 18 | +static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, ""); |
| 19 | static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == | 19 | static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == |
| 20 | sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + | 20 | sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + |
| 21 | sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + | 21 | sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + |
| @@ -56,6 +56,8 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { | @@ -56,6 +56,8 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { | ||
| 56 | fprintf(stdout, "tokens: %s\n", kokoro->tokens); | 56 | fprintf(stdout, "tokens: %s\n", kokoro->tokens); |
| 57 | fprintf(stdout, "data_dir: %s\n", kokoro->data_dir); | 57 | fprintf(stdout, "data_dir: %s\n", kokoro->data_dir); |
| 58 | fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale); | 58 | fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale); |
| 59 | + fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir); | ||
| 60 | + fprintf(stdout, "lexicon: %s\n", kokoro->lexicon); | ||
| 59 | 61 | ||
| 60 | fprintf(stdout, "----------tts model config----------\n"); | 62 | fprintf(stdout, "----------tts model config----------\n"); |
| 61 | fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); | 63 | fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); |
-
请 注册 或 登录 后发表评论