Fangjun Kuang
Committed by GitHub

Add C API for Kokoro TTS models (#1717)

@@ -79,6 +79,32 @@ jobs: @@ -79,6 +79,32 @@ jobs:
79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib 79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib
80 fi 80 fi
81 81
  82 + - name: Test Kokoro TTS (en)
  83 + shell: bash
  84 + run: |
  85 + gcc -o kokoro-tts-en-c-api ./c-api-examples/kokoro-tts-en-c-api.c \
  86 + -I ./build/install/include \
  87 + -L ./build/install/lib/ \
  88 + -l sherpa-onnx-c-api \
  89 + -l onnxruntime
  90 +
  91 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  92 + tar xf kokoro-en-v0_19.tar.bz2
  93 + rm kokoro-en-v0_19.tar.bz2
  94 +
  95 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  96 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  97 +
  98 + ./kokoro-tts-en-c-api
  99 +
  100 + rm ./kokoro-tts-en-c-api
  101 + rm -rf kokoro-en-*
  102 +
  103 + - uses: actions/upload-artifact@v4
  104 + with:
  105 + name: kokoro-tts-${{ matrix.os }}
  106 + path: ./generated-kokoro-*.wav
  107 +
82 - name: Test Matcha TTS (zh) 108 - name: Test Matcha TTS (zh)
83 shell: bash 109 shell: bash
84 run: | 110 run: |
@@ -81,6 +81,33 @@ jobs: @@ -81,6 +81,33 @@ jobs:
81 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 81 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
82 fi 82 fi
83 83
  84 + - name: Test Kokoro TTS (en)
  85 + shell: bash
  86 + run: |
  87 + g++ -std=c++17 -o kokoro-tts-en-cxx-api ./cxx-api-examples/kokoro-tts-en-cxx-api.cc \
  88 + -I ./build/install/include \
  89 + -L ./build/install/lib/ \
  90 + -l sherpa-onnx-cxx-api \
  91 + -l sherpa-onnx-c-api \
  92 + -l onnxruntime
  93 +
  94 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  95 + tar xf kokoro-en-v0_19.tar.bz2
  96 + rm kokoro-en-v0_19.tar.bz2
  97 +
  98 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  99 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  100 +
  101 + ./kokoro-tts-en-cxx-api
  102 +
  103 + rm kokoro-tts-en-cxx-api
  104 + rm -rf kokoro-en-*
  105 +
  106 + - uses: actions/upload-artifact@v4
  107 + with:
  108 + name: kokoro-tts-${{ matrix.os }}
  109 + path: ./generated-kokoro-*.wav
  110 +
84 - name: Test Matcha TTS (zh) 111 - name: Test Matcha TTS (zh)
85 shell: bash 112 shell: bash
86 run: | 113 run: |
@@ -127,3 +127,4 @@ harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE @@ -127,3 +127,4 @@ harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
127 harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md 127 harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md
128 matcha-icefall-zh-baker 128 matcha-icefall-zh-baker
129 matcha-icefall-en_US-ljspeech 129 matcha-icefall-en_US-ljspeech
  130 +kokoro-en-v0_19
@@ -13,6 +13,9 @@ if(SHERPA_ONNX_ENABLE_TTS) @@ -13,6 +13,9 @@ if(SHERPA_ONNX_ENABLE_TTS)
13 13
14 add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c) 14 add_executable(matcha-tts-en-c-api matcha-tts-en-c-api.c)
15 target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api) 15 target_link_libraries(matcha-tts-en-c-api sherpa-onnx-c-api)
  16 +
  17 + add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
  18 + target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)
16 endif() 19 endif()
17 20
18 if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) 21 if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  1 +// c-api-examples/kokoro-tts-en-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +// This file shows how to use sherpa-onnx C API
  6 +// for English TTS with Kokoro.
  7 +//
  8 +// clang-format off
  9 +/*
  10 +Usage
  11 +
  12 +
  13 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  14 +tar xf kokoro-en-v0_19.tar.bz2
  15 +rm kokoro-en-v0_19.tar.bz2
  16 +
  17 +./kokoro-tts-en-c-api
  18 +
  19 + */
  20 +// clang-format on
  21 +
  22 +#include <stdio.h>
  23 +#include <stdlib.h>
  24 +#include <string.h>
  25 +
  26 +#include "sherpa-onnx/c-api/c-api.h"
  27 +
  28 +static int32_t ProgressCallback(const float *samples, int32_t num_samples,
  29 + float progress) {
  30 + fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  31 + // return 1 to continue generating
  32 + // return 0 to stop generating
  33 + return 1;
  34 +}
  35 +
  36 +int32_t main(int32_t argc, char *argv[]) {
  37 + SherpaOnnxOfflineTtsConfig config;
  38 + memset(&config, 0, sizeof(config));
  39 + config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
  40 + config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
  41 + config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
  42 + config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
  43 +
  44 + config.model.num_threads = 2;
  45 +
  46 + // If you don't want to see debug messages, please set it to 0
  47 + config.model.debug = 1;
  48 +
  49 + const char *filename = "./generated-kokoro-en.wav";
  50 + const char *text =
  51 + "Today as always, men fall into two groups: slaves and free men. Whoever "
  52 + "does not have two-thirds of his day for himself, is a slave, whatever "
  53 + "he may be: a statesman, a businessman, an official, or a scholar. "
  54 + "Friends fell out often because life was changing so fast. The easiest "
  55 + "thing in the world was to lose touch with someone.";
  56 +
  57 + const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  58 + // mapping of sid to voice name
  59 + // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam
  60 + // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis
  61 + int32_t sid = 0;
  62 + float speed = 1.0; // larger -> faster in speech speed
  63 +
  64 +#if 0
  65 + // If you don't want to use a callback, then please enable this branch
  66 + const SherpaOnnxGeneratedAudio *audio =
  67 + SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
  68 +#else
  69 + const SherpaOnnxGeneratedAudio *audio =
  70 + SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
  71 + ProgressCallback);
  72 +#endif
  73 +
  74 + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
  75 +
  76 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  77 + SherpaOnnxDestroyOfflineTts(tts);
  78 +
  79 + fprintf(stderr, "Input text is: %s\n", text);
  80 + fprintf(stderr, "Speaker ID is is: %d\n", sid);
  81 + fprintf(stderr, "Saved to: %s\n", filename);
  82 +
  83 + return 0;
  84 +}
@@ -21,4 +21,7 @@ if(SHERPA_ONNX_ENABLE_TTS) @@ -21,4 +21,7 @@ if(SHERPA_ONNX_ENABLE_TTS)
21 21
22 add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc) 22 add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc)
23 target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api) 23 target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api)
  24 +
  25 + add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc)
  26 + target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api)
24 endif() 27 endif()
  1 +// cxx-api-examples/kokoro-tts-en-cxx-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +// This file shows how to use sherpa-onnx CXX API
  6 +// for Chinese TTS with Kokoro.
  7 +//
  8 +// clang-format off
  9 +/*
  10 +Usage
  11 +
  12 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
  13 +tar xf kokoro-en-v0_19.tar.bz2
  14 +rm kokoro-en-v0_19.tar.bz2
  15 +
  16 +./kokoro-tts-en-cxx-api
  17 +
  18 + */
  19 +// clang-format on
  20 +
  21 +#include <string>
  22 +
  23 +#include "sherpa-onnx/c-api/cxx-api.h"
  24 +
  25 +static int32_t ProgressCallback(const float *samples, int32_t num_samples,
  26 + float progress, void *arg) {
  27 + fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  28 + // return 1 to continue generating
  29 + // return 0 to stop generating
  30 + return 1;
  31 +}
  32 +
  33 +int32_t main(int32_t argc, char *argv[]) {
  34 + using namespace sherpa_onnx::cxx; // NOLINT
  35 + OfflineTtsConfig config;
  36 +
  37 + config.model.kokoro.model = "./kokoro-en-v0_19/model.onnx";
  38 + config.model.kokoro.voices = "./kokoro-en-v0_19/voices.bin";
  39 + config.model.kokoro.tokens = "./kokoro-en-v0_19/tokens.txt";
  40 + config.model.kokoro.data_dir = "./kokoro-en-v0_19/espeak-ng-data";
  41 +
  42 + config.model.num_threads = 2;
  43 +
  44 + // If you don't want to see debug messages, please set it to 0
  45 + config.model.debug = 1;
  46 +
  47 + std::string filename = "./generated-kokoro-en-cxx.wav";
  48 + std::string text =
  49 + "Today as always, men fall into two groups: slaves and free men. Whoever "
  50 + "does not have two-thirds of his day for himself, is a slave, whatever "
  51 + "he may be: a statesman, a businessman, an official, or a scholar. "
  52 + "Friends fell out often because life was changing so fast. The easiest "
  53 + "thing in the world was to lose touch with someone.";
  54 +
  55 + auto tts = OfflineTts::Create(config);
  56 + int32_t sid = 0;
  57 + float speed = 1.0; // larger -> faster in speech speed
  58 +
  59 +#if 0
  60 + // If you don't want to use a callback, then please enable this branch
  61 + GeneratedAudio audio = tts.Generate(text, sid, speed);
  62 +#else
  63 + GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
  64 +#endif
  65 +
  66 + WriteWave(filename, {audio.samples, audio.sample_rate});
  67 +
  68 + fprintf(stderr, "Input text is: %s\n", text.c_str());
  69 + fprintf(stderr, "Speaker ID is is: %d\n", sid);
  70 + fprintf(stderr, "Saved to: %s\n", filename.c_str());
  71 +
  72 + return 0;
  73 +}
@@ -1092,6 +1092,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( @@ -1092,6 +1092,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
1092 tts_config.model.matcha.dict_dir = 1092 tts_config.model.matcha.dict_dir =
1093 SHERPA_ONNX_OR(config->model.matcha.dict_dir, ""); 1093 SHERPA_ONNX_OR(config->model.matcha.dict_dir, "");
1094 1094
  1095 + // kokoro
  1096 + tts_config.model.kokoro.model =
  1097 + SHERPA_ONNX_OR(config->model.kokoro.model, "");
  1098 + tts_config.model.kokoro.voices =
  1099 + SHERPA_ONNX_OR(config->model.kokoro.voices, "");
  1100 + tts_config.model.kokoro.tokens =
  1101 + SHERPA_ONNX_OR(config->model.kokoro.tokens, "");
  1102 + tts_config.model.kokoro.data_dir =
  1103 + SHERPA_ONNX_OR(config->model.kokoro.data_dir, "");
  1104 + tts_config.model.kokoro.length_scale =
  1105 + SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0);
  1106 +
1095 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); 1107 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
1096 tts_config.model.debug = config->model.debug; 1108 tts_config.model.debug = config->model.debug;
1097 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); 1109 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
@@ -910,12 +910,22 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsMatchaModelConfig { @@ -910,12 +910,22 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsMatchaModelConfig {
910 const char *dict_dir; 910 const char *dict_dir;
911 } SherpaOnnxOfflineTtsMatchaModelConfig; 911 } SherpaOnnxOfflineTtsMatchaModelConfig;
912 912
  913 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
  914 + const char *model;
  915 + const char *voices;
  916 + const char *tokens;
  917 + const char *data_dir;
  918 +
  919 + float length_scale; // < 1, faster in speech speed; > 1, slower in speed
  920 +} SherpaOnnxOfflineTtsKokoroModelConfig;
  921 +
913 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { 922 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
914 SherpaOnnxOfflineTtsVitsModelConfig vits; 923 SherpaOnnxOfflineTtsVitsModelConfig vits;
915 int32_t num_threads; 924 int32_t num_threads;
916 int32_t debug; 925 int32_t debug;
917 const char *provider; 926 const char *provider;
918 SherpaOnnxOfflineTtsMatchaModelConfig matcha; 927 SherpaOnnxOfflineTtsMatchaModelConfig matcha;
  928 + SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
919 } SherpaOnnxOfflineTtsModelConfig; 929 } SherpaOnnxOfflineTtsModelConfig;
920 930
921 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { 931 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
@@ -338,6 +338,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { @@ -338,6 +338,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) {
338 c.model.matcha.length_scale = config.model.matcha.length_scale; 338 c.model.matcha.length_scale = config.model.matcha.length_scale;
339 c.model.matcha.dict_dir = config.model.matcha.dict_dir.c_str(); 339 c.model.matcha.dict_dir = config.model.matcha.dict_dir.c_str();
340 340
  341 + c.model.kokoro.model = config.model.kokoro.model.c_str();
  342 + c.model.kokoro.voices = config.model.kokoro.voices.c_str();
  343 + c.model.kokoro.tokens = config.model.kokoro.tokens.c_str();
  344 + c.model.kokoro.data_dir = config.model.kokoro.data_dir.c_str();
  345 + c.model.kokoro.length_scale = config.model.kokoro.length_scale;
  346 +
341 c.model.num_threads = config.model.num_threads; 347 c.model.num_threads = config.model.num_threads;
342 c.model.debug = config.model.debug; 348 c.model.debug = config.model.debug;
343 c.model.provider = config.model.provider.c_str(); 349 c.model.provider = config.model.provider.c_str();
@@ -338,9 +338,19 @@ struct OfflineTtsMatchaModelConfig { @@ -338,9 +338,19 @@ struct OfflineTtsMatchaModelConfig {
338 float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed 338 float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed
339 }; 339 };
340 340
  341 +struct OfflineTtsKokoroModelConfig {
  342 + std::string model;
  343 + std::string voices;
  344 + std::string tokens;
  345 + std::string data_dir;
  346 +
  347 + float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed
  348 +};
  349 +
341 struct OfflineTtsModelConfig { 350 struct OfflineTtsModelConfig {
342 OfflineTtsVitsModelConfig vits; 351 OfflineTtsVitsModelConfig vits;
343 OfflineTtsMatchaModelConfig matcha; 352 OfflineTtsMatchaModelConfig matcha;
  353 + OfflineTtsKokoroModelConfig kokoro;
344 int32_t num_threads = 1; 354 int32_t num_threads = 1;
345 bool debug = false; 355 bool debug = false;
346 std::string provider = "cpu"; 356 std::string provider = "cpu";