Fangjun Kuang
Committed by GitHub

Add C API for Kokoro TTS 1.0 (#1801)

@@ -100,6 +100,27 @@ jobs: @@ -100,6 +100,27 @@ jobs:
100 rm ./kws-c-api 100 rm ./kws-c-api
101 rm -rf sherpa-onnx-kws-* 101 rm -rf sherpa-onnx-kws-*
102 102
  103 + - name: Test Kokoro TTS (zh+en)
  104 + shell: bash
  105 + run: |
  106 + gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \
  107 + -I ./build/install/include \
  108 + -L ./build/install/lib/ \
  109 + -l sherpa-onnx-c-api \
  110 + -l onnxruntime
  111 +
  112 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  113 + tar xf kokoro-multi-lang-v1_0.tar.bz2
  114 + rm kokoro-multi-lang-v1_0.tar.bz2
  115 +
  116 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  117 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  118 +
  119 + ./kokoro-tts-zh-en-c-api
  120 +
  121 + rm ./kokoro-tts-zh-en-c-api
  122 + rm -rf kokoro-zh-en-*
  123 +
103 - name: Test Kokoro TTS (en) 124 - name: Test Kokoro TTS (en)
104 shell: bash 125 shell: bash
105 run: | 126 run: |
@@ -19,6 +19,9 @@ if(SHERPA_ONNX_ENABLE_TTS) @@ -19,6 +19,9 @@ if(SHERPA_ONNX_ENABLE_TTS)
19 19
20 add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c) 20 add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
21 target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api) 21 target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)
  22 +
  23 + add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c)
  24 + target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api)
22 endif() 25 endif()
23 26
24 if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) 27 if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
  1 +// c-api-examples/kokoro-tts-zh-en-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +// This file shows how to use sherpa-onnx C API
  6 +// for English + Chinese TTS with Kokoro.
  7 +//
  8 +// clang-format off
  9 +/*
  10 +Usage
  11 +
  12 +
  13 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  14 +tar xf kokoro-multi-lang-v1_0.tar.bz2
  15 +rm kokoro-multi-lang-v1_0.tar.bz2
  16 +
  17 +./kokoro-tts-zh-en-c-api
  18 +
  19 + */
  20 +// clang-format on
  21 +
  22 +#include <stdio.h>
  23 +#include <stdlib.h>
  24 +#include <string.h>
  25 +
  26 +#include "sherpa-onnx/c-api/c-api.h"
  27 +
  28 +static int32_t ProgressCallback(const float *samples, int32_t num_samples,
  29 + float progress) {
  30 + fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
  31 + // return 1 to continue generating
  32 + // return 0 to stop generating
  33 + return 1;
  34 +}
  35 +
  36 +int32_t main(int32_t argc, char *argv[]) {
  37 + SherpaOnnxOfflineTtsConfig config;
  38 + memset(&config, 0, sizeof(config));
  39 + config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
  40 + config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
  41 + config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
  42 + config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
  43 + config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
  44 + config.model.kokoro.lexicon =
  45 + "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
  46 + "lexicon-zh.txt";
  47 +
  48 + config.model.num_threads = 2;
  49 +
  50 + // If you don't want to see debug messages, please set it to 0
  51 + config.model.debug = 1;
  52 +
  53 + const char *filename = "./generated-kokoro-zh-en.wav";
  54 + const char *text =
  55 + "中英文语音合成测试。This is generated by next generation Kaldi using "
  56 + "Kokoro without Misaki. 你觉得中英文说的如何呢?";
  57 +
  58 + const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  59 + int32_t sid = 0; // there are 53 speakers
  60 + float speed = 1.0; // larger -> faster in speech speed
  61 +
  62 +#if 0
  63 + // If you don't want to use a callback, then please enable this branch
  64 + const SherpaOnnxGeneratedAudio *audio =
  65 + SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
  66 +#else
  67 + const SherpaOnnxGeneratedAudio *audio =
  68 + SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
  69 + ProgressCallback);
  70 +#endif
  71 +
  72 + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
  73 +
  74 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  75 + SherpaOnnxDestroyOfflineTts(tts);
  76 +
  77 + fprintf(stderr, "Input text is: %s\n", text);
  78 + fprintf(stderr, "Speaker ID is is: %d\n", sid);
  79 + fprintf(stderr, "Saved to: %s\n", filename);
  80 +
  81 + return 0;
  82 +}
@@ -1120,6 +1120,10 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( @@ -1120,6 +1120,10 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
1120 SHERPA_ONNX_OR(config->model.kokoro.data_dir, ""); 1120 SHERPA_ONNX_OR(config->model.kokoro.data_dir, "");
1121 tts_config.model.kokoro.length_scale = 1121 tts_config.model.kokoro.length_scale =
1122 SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0); 1122 SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0);
  1123 + tts_config.model.kokoro.dict_dir =
  1124 + SHERPA_ONNX_OR(config->model.kokoro.dict_dir, "");
  1125 + tts_config.model.kokoro.lexicon =
  1126 + SHERPA_ONNX_OR(config->model.kokoro.lexicon, "");
1123 1127
1124 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); 1128 tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
1125 tts_config.model.debug = config->model.debug; 1129 tts_config.model.debug = config->model.debug;
@@ -926,6 +926,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig { @@ -926,6 +926,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
926 const char *data_dir; 926 const char *data_dir;
927 927
928 float length_scale; // < 1, faster in speech speed; > 1, slower in speed 928 float length_scale; // < 1, faster in speech speed; > 1, slower in speed
  929 + const char *dict_dir;
  930 + const char *lexicon;
929 } SherpaOnnxOfflineTtsKokoroModelConfig; 931 } SherpaOnnxOfflineTtsKokoroModelConfig;
930 932
931 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { 933 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
@@ -4,6 +4,8 @@ @@ -4,6 +4,8 @@
4 #ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_ 4 #ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
5 #define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_ 5 #define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
6 6
  7 +#include <iomanip>
  8 +#include <ios>
7 #include <memory> 9 #include <memory>
8 #include <string> 10 #include <string>
9 #include <strstream> 11 #include <strstream>
@@ -189,6 +191,20 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl { @@ -189,6 +191,20 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl {
189 #else 191 #else
190 SHERPA_ONNX_LOGE("Raw text: %s", text.c_str()); 192 SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
191 #endif 193 #endif
  194 + std::ostringstream os;
  195 + os << "In bytes (hex):\n";
  196 + const auto p = reinterpret_cast<const uint8_t *>(text.c_str());
  197 + for (int32_t i = 0; i != text.size(); ++i) {
  198 + os << std::setw(2) << std::setfill('0') << std::hex
  199 + << static_cast<uint32_t>(p[i]) << " ";
  200 + }
  201 + os << "\n";
  202 +
  203 +#if __OHOS__
  204 + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
  205 +#else
  206 + SHERPA_ONNX_LOGE("%s", os.str().c_str());
  207 +#endif
192 } 208 }
193 209
194 if (!tn_list_.empty()) { 210 if (!tn_list_.empty()) {