正在显示
11 个修改的文件
包含
255 行增加
和
0 行删除
| @@ -79,6 +79,32 @@ jobs: | @@ -79,6 +79,32 @@ jobs: | ||
| 79 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib | 79 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib |
| 80 | fi | 80 | fi |
| 81 | 81 | ||
| 82 | + - name: Test Kokoro TTS (en) | ||
| 83 | + shell: bash | ||
| 84 | + run: | | ||
| 85 | + gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \ | ||
| 86 | + -I ./build/install/include \ | ||
| 87 | + -L ./build/install/lib/ \ | ||
| 88 | + -l sherpa-onnx-c-api \ | ||
| 89 | + -l onnxruntime | ||
| 90 | + | ||
| 91 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 92 | + tar xf kokoro-en-v0_19.tar.bz2 | ||
| 93 | + rm kokoro-en-v0_19.tar.bz2 | ||
| 94 | + | ||
| 95 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 96 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 97 | + | ||
| 98 | + ./kokoro-tts-en-c-api | ||
| 99 | + | ||
| 100 | + rm ./kokoro-tts-en-c-api | ||
| 101 | + rm -rf kokoro-en-* | ||
| 102 | + | ||
| 103 | + - uses: actions/upload-artifact@v4 | ||
| 104 | + with: | ||
| 105 | + name: kokoro-tts-${{ matrix.os }} | ||
| 106 | + path: ./generated-kokoro-*.wav | ||
| 107 | + | ||
| 82 | - name: Test Matcha TTS (zh) | 108 | - name: Test Matcha TTS (zh) |
| 83 | shell: bash | 109 | shell: bash |
| 84 | run: | | 110 | run: | |
| @@ -81,6 +81,33 @@ jobs: | @@ -81,6 +81,33 @@ jobs: | ||
| 81 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 81 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 82 | fi | 82 | fi |
| 83 | 83 | ||
| 84 | + - name: Test Kokoro TTS (en) | ||
| 85 | + shell: bash | ||
| 86 | + run: | | ||
| 87 | + g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \ | ||
| 88 | + -I ./build/install/include \ | ||
| 89 | + -L ./build/install/lib/ \ | ||
| 90 | + -l sherpa-onnx-cxx-api \ | ||
| 91 | + -l sherpa-onnx-c-api \ | ||
| 92 | + -l onnxruntime | ||
| 93 | + | ||
| 94 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 95 | + tar xf kokoro-en-v0_19.tar.bz2 | ||
| 96 | + rm kokoro-en-v0_19.tar.bz2 | ||
| 97 | + | ||
| 98 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 99 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 100 | + | ||
| 101 | + ./kokoro-tts-en-cxx-api | ||
| 102 | + | ||
| 103 | + rm kokoro-tts-en-cxx-api | ||
| 104 | + rm -rf kokoro-en-* | ||
| 105 | + | ||
| 106 | + - uses: actions/upload-artifact@v4 | ||
| 107 | + with: | ||
| 108 | + name: kokoro-tts-${{ matrix.os }} | ||
| 109 | + path: ./generated-kokoro-*.wav | ||
| 110 | + | ||
| 84 | - name: Test Matcha TTS (zh) | 111 | - name: Test Matcha TTS (zh) |
| 85 | shell: bash | 112 | shell: bash |
| 86 | run: | | 113 | run: | |
| @@ -127,3 +127,4 @@ harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE | @@ -127,3 +127,4 @@ harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE | ||
| 127 | harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md | 127 | harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md |
| 128 | matcha-icefall-zh-baker | 128 | matcha-icefall-zh-baker |
| 129 | matcha-icefall-en_US-ljspeech | 129 | matcha-icefall-en_US-ljspeech |
| 130 | +kokoro-en-v0_19 |
| @@ -13,6 +13,9 @@ if(SHERPA_ONNX_ENABLE_TTS) | @@ -13,6 +13,9 @@ if(SHERPA_ONNX_ENABLE_TTS) | ||
| 13 | 13 | ||
| 14 | add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c) | 14 | add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c) |
| 15 | target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api) | 15 | target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api) |
| 16 | + | ||
| 17 | + add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c) | ||
| 18 | + target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api) | ||
| 16 | endif() | 19 | endif() |
| 17 | 20 | ||
| 18 | if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) | 21 | if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) |
c-api-examples/kokoro-tts-en-c-api.c
0 → 100644
| 1 | +// c-api-examples/kokoro-tts-en-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// This file shows how to use sherpa-onnx C API | ||
| 6 | +// for English TTS with Kokoro. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +/* | ||
| 10 | +Usage | ||
| 11 | + | ||
| 12 | + | ||
| 13 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 14 | +tar xf kokoro-en-v0_19.tar.bz2 | ||
| 15 | +rm kokoro-en-v0_19.tar.bz2 | ||
| 16 | + | ||
| 17 | +./kokoro-tts-en-c-api | ||
| 18 | + | ||
| 19 | + */ | ||
| 20 | +// clang-format on | ||
| 21 | + | ||
| 22 | +#include <stdio.h> | ||
| 23 | +#include <stdlib.h> | ||
| 24 | +#include <string.h> | ||
| 25 | + | ||
| 26 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 27 | + | ||
| 28 | +static int32_t ProgressCallback(const float *samples, int32_t num_samples, | ||
| 29 | + float progress) { | ||
| 30 | + fprintf(stderr, "Progress: %.3f%%\n", progress * 100); | ||
| 31 | + // return 1 to continue generating | ||
| 32 | + // return 0 to stop generating | ||
| 33 | + return 1; | ||
| 34 | +} | ||
| 35 | + | ||
| 36 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 37 | + SherpaOnnxOfflineTtsConfig config; | ||
| 38 | + memset(&config, 0, sizeof(config)); | ||
| 39 | + config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx"; | ||
| 40 | + config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin"; | ||
| 41 | + config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt"; | ||
| 42 | + config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data"; | ||
| 43 | + | ||
| 44 | + config.model.num_threads = 2; | ||
| 45 | + | ||
| 46 | + // If you don't want to see debug messages, please set it to 0 | ||
| 47 | + config.model.debug = 1; | ||
| 48 | + | ||
| 49 | + const char *filename = "./generated-kokoro-en.wav"; | ||
| 50 | + const char *text = | ||
| 51 | + "Today as always, men fall into two groups: slaves and free men. Whoever " | ||
| 52 | + "does not have two-thirds of his day for himself, is a slave, whatever " | ||
| 53 | + "he may be: a statesman, a businessman, an official, or a scholar. " | ||
| 54 | + "Friends fell out often because life was changing so fast. The easiest " | ||
| 55 | + "thing in the world was to lose touch with someone."; | ||
| 56 | + | ||
| 57 | + const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); | ||
| 58 | + // mapping of sid to voice name | ||
| 59 | + // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam | ||
| 60 | + // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis | ||
| 61 | + int32_t sid = 0; | ||
| 62 | + float speed = 1.0; // larger -> faster in speech speed | ||
| 63 | + | ||
| 64 | +#if 0 | ||
| 65 | + // If you don't want to use a callback, then please enable this branch | ||
| 66 | + const SherpaOnnxGeneratedAudio *audio = | ||
| 67 | + SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed); | ||
| 68 | +#else | ||
| 69 | + const SherpaOnnxGeneratedAudio *audio = | ||
| 70 | + SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed, | ||
| 71 | + ProgressCallback); | ||
| 72 | +#endif | ||
| 73 | + | ||
| 74 | + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename); | ||
| 75 | + | ||
| 76 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); | ||
| 77 | + SherpaOnnxDestroyOfflineTts(tts); | ||
| 78 | + | ||
| 79 | + fprintf(stderr, "Input text is: %s\n", text); | ||
| 80 | + fprintf(stderr, "Speaker ID is is: %d\n", sid); | ||
| 81 | + fprintf(stderr, "Saved to: %s\n", filename); | ||
| 82 | + | ||
| 83 | + return 0; | ||
| 84 | +} |
| @@ -21,4 +21,7 @@ if(SHERPA_ONNX_ENABLE_TTS) | @@ -21,4 +21,7 @@ if(SHERPA_ONNX_ENABLE_TTS) | ||
| 21 | 21 | ||
| 22 | add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc) | 22 | add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc) |
| 23 | target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api) | 23 | target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api) |
| 24 | + | ||
| 25 | + add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc) | ||
| 26 | + target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api) | ||
| 24 | endif() | 27 | endif() |
cxx-api-examples/kokoro-tts-en-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/kokoro-tts-en-cxx-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// This file shows how to use sherpa-onnx CXX API | ||
| 6 | +// for Chinese TTS with Kokoro. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +/* | ||
| 10 | +Usage | ||
| 11 | + | ||
| 12 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 13 | +tar xf kokoro-en-v0_19.tar.bz2 | ||
| 14 | +rm kokoro-en-v0_19.tar.bz2 | ||
| 15 | + | ||
| 16 | +./kokoro-tts-en-cxx-api | ||
| 17 | + | ||
| 18 | + */ | ||
| 19 | +// clang-format on | ||
| 20 | + | ||
| 21 | +#include <string> | ||
| 22 | + | ||
| 23 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 24 | + | ||
| 25 | +static int32_t ProgressCallback(const float *samples, int32_t num_samples, | ||
| 26 | + float progress, void *arg) { | ||
| 27 | + fprintf(stderr, "Progress: %.3f%%\n", progress * 100); | ||
| 28 | + // return 1 to continue generating | ||
| 29 | + // return 0 to stop generating | ||
| 30 | + return 1; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 34 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 35 | + OfflineTtsConfig config; | ||
| 36 | + | ||
| 37 | + config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx"; | ||
| 38 | + config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin"; | ||
| 39 | + config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt"; | ||
| 40 | + config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data"; | ||
| 41 | + | ||
| 42 | + config.model.num_threads = 2; | ||
| 43 | + | ||
| 44 | + // If you don't want to see debug messages, please set it to 0 | ||
| 45 | + config.model.debug = 1; | ||
| 46 | + | ||
| 47 | + std::string filename = "./generated-kokoro-en-cxx.wav"; | ||
| 48 | + std::string text = | ||
| 49 | + "Today as always, men fall into two groups: slaves and free men. Whoever " | ||
| 50 | + "does not have two-thirds of his day for himself, is a slave, whatever " | ||
| 51 | + "he may be: a statesman, a businessman, an official, or a scholar. " | ||
| 52 | + "Friends fell out often because life was changing so fast. The easiest " | ||
| 53 | + "thing in the world was to lose touch with someone."; | ||
| 54 | + | ||
| 55 | + auto tts = OfflineTts::Create(config); | ||
| 56 | + int32_t sid = 0; | ||
| 57 | + float speed = 1.0; // larger -> faster in speech speed | ||
| 58 | + | ||
| 59 | +#if 0 | ||
| 60 | + // If you don't want to use a callback, then please enable this branch | ||
| 61 | + GeneratedAudio audio = tts.Generate(text, sid, speed); | ||
| 62 | +#else | ||
| 63 | + GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback); | ||
| 64 | +#endif | ||
| 65 | + | ||
| 66 | + WriteWave(filename, {audio.samples, audio.sample_rate}); | ||
| 67 | + | ||
| 68 | + fprintf(stderr, "Input text is: %s\n", text.c_str()); | ||
| 69 | + fprintf(stderr, "Speaker ID is is: %d\n", sid); | ||
| 70 | + fprintf(stderr, "Saved to: %s\n", filename.c_str()); | ||
| 71 | + | ||
| 72 | + return 0; | ||
| 73 | +} |
| @@ -1092,6 +1092,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( | @@ -1092,6 +1092,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( | ||
| 1092 | tts_config.model.matcha.dict_dir = | 1092 | tts_config.model.matcha.dict_dir = |
| 1093 | SHERPA_ONNX_OR(config->model.matcha.dict_dir, ""); | 1093 | SHERPA_ONNX_OR(config->model.matcha.dict_dir, ""); |
| 1094 | 1094 | ||
| 1095 | + // kokoro | ||
| 1096 | + tts_config.model.kokoro.model = | ||
| 1097 | + SHERPA_ONNX_OR(config->model.kokoro.model, ""); | ||
| 1098 | + tts_config.model.kokoro.voices = | ||
| 1099 | + SHERPA_ONNX_OR(config->model.kokoro.voices, ""); | ||
| 1100 | + tts_config.model.kokoro.tokens = | ||
| 1101 | + SHERPA_ONNX_OR(config->model.kokoro.tokens, ""); | ||
| 1102 | + tts_config.model.kokoro.data_dir = | ||
| 1103 | + SHERPA_ONNX_OR(config->model.kokoro.data_dir, ""); | ||
| 1104 | + tts_config.model.kokoro.length_scale = | ||
| 1105 | + SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0); | ||
| 1106 | + | ||
| 1095 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | 1107 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); |
| 1096 | tts_config.model.debug = config->model.debug; | 1108 | tts_config.model.debug = config->model.debug; |
| 1097 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 1109 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| @@ -910,12 +910,22 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsMatchaModelConfig { | @@ -910,12 +910,22 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsMatchaModelConfig { | ||
| 910 | const char *dict_dir; | 910 | const char *dict_dir; |
| 911 | } SherpaOnnxOfflineTtsMatchaModelConfig; | 911 | } SherpaOnnxOfflineTtsMatchaModelConfig; |
| 912 | 912 | ||
| 913 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig { | ||
| 914 | + const char *model; | ||
| 915 | + const char *voices; | ||
| 916 | + const char *tokens; | ||
| 917 | + const char *data_dir; | ||
| 918 | + | ||
| 919 | + float length_scale; // < 1, faster in speech speed; > 1, slower in speed | ||
| 920 | +} SherpaOnnxOfflineTtsKokoroModelConfig; | ||
| 921 | + | ||
| 913 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | 922 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { |
| 914 | SherpaOnnxOfflineTtsVitsModelConfig vits; | 923 | SherpaOnnxOfflineTtsVitsModelConfig vits; |
| 915 | int32_t num_threads; | 924 | int32_t num_threads; |
| 916 | int32_t debug; | 925 | int32_t debug; |
| 917 | const char *provider; | 926 | const char *provider; |
| 918 | SherpaOnnxOfflineTtsMatchaModelConfig matcha; | 927 | SherpaOnnxOfflineTtsMatchaModelConfig matcha; |
| 928 | + SherpaOnnxOfflineTtsKokoroModelConfig kokoro; | ||
| 919 | } SherpaOnnxOfflineTtsModelConfig; | 929 | } SherpaOnnxOfflineTtsModelConfig; |
| 920 | 930 | ||
| 921 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { | 931 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { |
| @@ -338,6 +338,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { | @@ -338,6 +338,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { | ||
| 338 | c.model.matcha.length_scale = config.model.matcha.length_scale; | 338 | c.model.matcha.length_scale = config.model.matcha.length_scale; |
| 339 | c.model.matcha.dict_dir = config.model.matcha.dict_dir.c_str(); | 339 | c.model.matcha.dict_dir = config.model.matcha.dict_dir.c_str(); |
| 340 | 340 | ||
| 341 | + c.model.kokoro.model = config.model.kokoro.model.c_str(); | ||
| 342 | + c.model.kokoro.voices = config.model.kokoro.voices.c_str(); | ||
| 343 | + c.model.kokoro.tokens = config.model.kokoro.tokens.c_str(); | ||
| 344 | + c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str(); | ||
| 345 | + c.model.kokoro.length_scale = config.model.kokoro.length_scale; | ||
| 346 | + | ||
| 341 | c.model.num_threads = config.model.num_threads; | 347 | c.model.num_threads = config.model.num_threads; |
| 342 | c.model.debug = config.model.debug; | 348 | c.model.debug = config.model.debug; |
| 343 | c.model.provider = config.model.provider.c_str(); | 349 | c.model.provider = config.model.provider.c_str(); |
| @@ -338,9 +338,19 @@ struct OfflineTtsMatchaModelConfig { | @@ -338,9 +338,19 @@ struct OfflineTtsMatchaModelConfig { | ||
| 338 | float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed | 338 | float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed |
| 339 | }; | 339 | }; |
| 340 | 340 | ||
| 341 | +struct OfflineTtsKokoroModelConfig { | ||
| 342 | + std::string model; | ||
| 343 | + std::string voices; | ||
| 344 | + std::string tokens; | ||
| 345 | + std::string data_dir; | ||
| 346 | + | ||
| 347 | + float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed | ||
| 348 | +}; | ||
| 349 | + | ||
| 341 | struct OfflineTtsModelConfig { | 350 | struct OfflineTtsModelConfig { |
| 342 | OfflineTtsVitsModelConfig vits; | 351 | OfflineTtsVitsModelConfig vits; |
| 343 | OfflineTtsMatchaModelConfig matcha; | 352 | OfflineTtsMatchaModelConfig matcha; |
| 353 | + OfflineTtsKokoroModelConfig kokoro; | ||
| 344 | int32_t num_threads = 1; | 354 | int32_t num_threads = 1; |
| 345 | bool debug = false; | 355 | bool debug = false; |
| 346 | std::string provider = "cpu"; | 356 | std::string provider = "cpu"; |
-
请 注册 或 登录 后发表评论