正在显示
7 个修改的文件
包含
111 行增加
和
6 行删除
| @@ -103,6 +103,28 @@ jobs: | @@ -103,6 +103,28 @@ jobs: | ||
| 103 | rm kws-cxx-api | 103 | rm kws-cxx-api |
| 104 | rm -rf sherpa-onnx-kws-* | 104 | rm -rf sherpa-onnx-kws-* |
| 105 | 105 | ||
| 106 | + - name: Test Kokoro TTS (zh+en) | ||
| 107 | + shell: bash | ||
| 108 | + run: | | ||
| 109 | + g++ -std=c++17 -o kokoro-tts-zh-en-cxx-api ./cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc \ | ||
| 110 | + -I ./build/install/include \ | ||
| 111 | + -L ./build/install/lib/ \ | ||
| 112 | + -l sherpa-onnx-cxx-api \ | ||
| 113 | + -l sherpa-onnx-c-api \ | ||
| 114 | + -l onnxruntime | ||
| 115 | + | ||
| 116 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 117 | + tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 118 | + rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 119 | + | ||
| 120 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 121 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 122 | + | ||
| 123 | + ./kokoro-tts-zh-en-cxx-api | ||
| 124 | + | ||
| 125 | + rm kokoro-tts-zh-en-cxx-api | ||
| 126 | + rm -rf kokoro-* | ||
| 127 | + | ||
| 106 | - name: Test Kokoro TTS (en) | 128 | - name: Test Kokoro TTS (en) |
| 107 | shell: bash | 129 | shell: bash |
| 108 | run: | | 130 | run: | |
| @@ -26,7 +26,7 @@ int32_t main() { | @@ -26,7 +26,7 @@ int32_t main() { | ||
| 26 | memset(&config, 0, sizeof(config)); | 26 | memset(&config, 0, sizeof(config)); |
| 27 | config.model_config.transducer.encoder = | 27 | config.model_config.transducer.encoder = |
| 28 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 28 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| 29 | - "encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; | 29 | + "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"; |
| 30 | 30 | ||
| 31 | config.model_config.transducer.decoder = | 31 | config.model_config.transducer.decoder = |
| 32 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 32 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| @@ -34,7 +34,7 @@ int32_t main() { | @@ -34,7 +34,7 @@ int32_t main() { | ||
| 34 | 34 | ||
| 35 | config.model_config.transducer.joiner = | 35 | config.model_config.transducer.joiner = |
| 36 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 36 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| 37 | - "joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; | 37 | + "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx"; |
| 38 | 38 | ||
| 39 | config.model_config.tokens = | 39 | config.model_config.tokens = |
| 40 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 40 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| @@ -58,7 +58,8 @@ int32_t main() { | @@ -58,7 +58,8 @@ int32_t main() { | ||
| 58 | "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n"); | 58 | "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n"); |
| 59 | 59 | ||
| 60 | const char *wav_filename = | 60 | const char *wav_filename = |
| 61 | - "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; | 61 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| 62 | + "test_wavs/3.wav"; | ||
| 62 | 63 | ||
| 63 | float tail_paddings[8000] = {0}; // 0.5 seconds | 64 | float tail_paddings[8000] = {0}; // 0.5 seconds |
| 64 | 65 |
| @@ -27,4 +27,7 @@ if(SHERPA_ONNX_ENABLE_TTS) | @@ -27,4 +27,7 @@ if(SHERPA_ONNX_ENABLE_TTS) | ||
| 27 | 27 | ||
| 28 | add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc) | 28 | add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc) |
| 29 | target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api) | 29 | target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api) |
| 30 | + | ||
| 31 | + add_executable(kokoro-tts-zh-en-cxx-api ./kokoro-tts-zh-en-cxx-api.cc) | ||
| 32 | + target_link_libraries(kokoro-tts-zh-en-cxx-api sherpa-onnx-cxx-api) | ||
| 30 | endif() | 33 | endif() |
cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/kokoro-tts-zh-en-cxx-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// This file shows how to use sherpa-onnx CXX API | ||
| 6 | +// for Chinese TTS with Kokoro. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +/* | ||
| 10 | +Usage | ||
| 11 | + | ||
| 12 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 13 | +tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 14 | +rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 15 | + | ||
| 16 | +./kokoro-tts-zh-en-cxx-api | ||
| 17 | + | ||
| 18 | + */ | ||
| 19 | +// clang-format on | ||
| 20 | + | ||
| 21 | +#include <string> | ||
| 22 | + | ||
| 23 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 24 | + | ||
| 25 | +static int32_t ProgressCallback(const float *samples, int32_t num_samples, | ||
| 26 | + float progress, void *arg) { | ||
| 27 | + fprintf(stderr, "Progress: %.3f%%\n", progress * 100); | ||
| 28 | + // return 1 to continue generating | ||
| 29 | + // return 0 to stop generating | ||
| 30 | + return 1; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 34 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 35 | + OfflineTtsConfig config; | ||
| 36 | + | ||
| 37 | + config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx"; | ||
| 38 | + config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin"; | ||
| 39 | + config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt"; | ||
| 40 | + config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data"; | ||
| 41 | + config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict"; | ||
| 42 | + config.model.kokoro.lexicon = | ||
| 43 | + "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/" | ||
| 44 | + "lexicon-zh.txt"; | ||
| 45 | + | ||
| 46 | + config.model.num_threads = 2; | ||
| 47 | + | ||
| 48 | + // If you don't want to see debug messages, please set it to 0 | ||
| 49 | + config.model.debug = 1; | ||
| 50 | + | ||
| 51 | + std::string filename = "./generated-kokoro-zh-en-cxx.wav"; | ||
| 52 | + std::string text = | ||
| 53 | + "中英文语音合成测试。This is generated by next generation Kaldi using " | ||
| 54 | + "Kokoro without Misaki. 你觉得中英文说的如何呢?"; | ||
| 55 | + | ||
| 56 | + auto tts = OfflineTts::Create(config); | ||
| 57 | + int32_t sid = 50; | ||
| 58 | + float speed = 1.0; // larger -> faster in speech speed | ||
| 59 | + | ||
| 60 | +#if 0 | ||
| 61 | + // If you don't want to use a callback, then please enable this branch | ||
| 62 | + GeneratedAudio audio = tts.Generate(text, sid, speed); | ||
| 63 | +#else | ||
| 64 | + GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback); | ||
| 65 | +#endif | ||
| 66 | + | ||
| 67 | + WriteWave(filename, {audio.samples, audio.sample_rate}); | ||
| 68 | + | ||
| 69 | + fprintf(stderr, "Input text is: %s\n", text.c_str()); | ||
| 70 | + fprintf(stderr, "Speaker ID is is: %d\n", sid); | ||
| 71 | + fprintf(stderr, "Saved to: %s\n", filename.c_str()); | ||
| 72 | + | ||
| 73 | + return 0; | ||
| 74 | +} |
| @@ -25,7 +25,7 @@ int32_t main() { | @@ -25,7 +25,7 @@ int32_t main() { | ||
| 25 | KeywordSpotterConfig config; | 25 | KeywordSpotterConfig config; |
| 26 | config.model_config.transducer.encoder = | 26 | config.model_config.transducer.encoder = |
| 27 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 27 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| 28 | - "encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; | 28 | + "encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"; |
| 29 | 29 | ||
| 30 | config.model_config.transducer.decoder = | 30 | config.model_config.transducer.decoder = |
| 31 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 31 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| @@ -33,7 +33,7 @@ int32_t main() { | @@ -33,7 +33,7 @@ int32_t main() { | ||
| 33 | 33 | ||
| 34 | config.model_config.transducer.joiner = | 34 | config.model_config.transducer.joiner = |
| 35 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 35 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| 36 | - "joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; | 36 | + "joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx"; |
| 37 | 37 | ||
| 38 | config.model_config.tokens = | 38 | config.model_config.tokens = |
| 39 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" | 39 | "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| @@ -57,7 +57,8 @@ int32_t main() { | @@ -57,7 +57,8 @@ int32_t main() { | ||
| 57 | << "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n"; | 57 | << "--Test pre-defined keywords from test_wavs/test_keywords.txt--\n"; |
| 58 | 58 | ||
| 59 | std::string wave_filename = | 59 | std::string wave_filename = |
| 60 | - "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; | 60 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile/" |
| 61 | + "test_wavs/3.wav"; | ||
| 61 | 62 | ||
| 62 | std::array<float, 8000> tail_paddings = {0}; // 0.5 seconds | 63 | std::array<float, 8000> tail_paddings = {0}; // 0.5 seconds |
| 63 | 64 |
| @@ -343,6 +343,8 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { | @@ -343,6 +343,8 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { | ||
| 343 | c.model.kokoro.tokens = config.model.kokoro.tokens.c_str(); | 343 | c.model.kokoro.tokens = config.model.kokoro.tokens.c_str(); |
| 344 | c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str(); | 344 | c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str(); |
| 345 | c.model.kokoro.length_scale = config.model.kokoro.length_scale; | 345 | c.model.kokoro.length_scale = config.model.kokoro.length_scale; |
| 346 | + c.model.kokoro.dict_dir = config.model.kokoro.dict_dir.c_str(); | ||
| 347 | + c.model.kokoro.lexicon = config.model.kokoro.lexicon.c_str(); | ||
| 346 | 348 | ||
| 347 | c.model.num_threads = config.model.num_threads; | 349 | c.model.num_threads = config.model.num_threads; |
| 348 | c.model.debug = config.model.debug; | 350 | c.model.debug = config.model.debug; |
| @@ -343,6 +343,8 @@ struct OfflineTtsKokoroModelConfig { | @@ -343,6 +343,8 @@ struct OfflineTtsKokoroModelConfig { | ||
| 343 | std::string voices; | 343 | std::string voices; |
| 344 | std::string tokens; | 344 | std::string tokens; |
| 345 | std::string data_dir; | 345 | std::string data_dir; |
| 346 | + std::string dict_dir; | ||
| 347 | + std::string lexicon; | ||
| 346 | 348 | ||
| 347 | float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed | 349 | float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed |
| 348 | }; | 350 | }; |
-
请 注册 或 登录 后发表评论