正在显示
14 个修改的文件
包含
153 行增加
和
11 行删除
| @@ -87,6 +87,45 @@ jobs: | @@ -87,6 +87,45 @@ jobs: | ||
| 87 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 87 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 88 | fi | 88 | fi |
| 89 | 89 | ||
| 90 | + - name: Test KittenTTS | ||
| 91 | + shell: bash | ||
| 92 | + run: | | ||
| 93 | + name=kitten-tts-en-cxx-api | ||
| 94 | + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \ | ||
| 95 | + -I ./build/install/include \ | ||
| 96 | + -L ./build/install/lib/ \ | ||
| 97 | + -l sherpa-onnx-cxx-api \ | ||
| 98 | + -l sherpa-onnx-c-api \ | ||
| 99 | + -l onnxruntime | ||
| 100 | + | ||
| 101 | + ls -lh $name | ||
| 102 | + | ||
| 103 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 104 | + ls -lh ./$name | ||
| 105 | + ldd ./$name | ||
| 106 | + echo "----" | ||
| 107 | + readelf -d ./$name | ||
| 108 | + fi | ||
| 109 | + | ||
| 110 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 111 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 112 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 113 | + | ||
| 114 | + echo "---" | ||
| 115 | + | ||
| 116 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 117 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 118 | + | ||
| 119 | + ./$name | ||
| 120 | + | ||
| 121 | + rm -rf kitten-nano-en-v0_1-fp16 | ||
| 122 | + rm -v ./$name | ||
| 123 | + | ||
| 124 | + - uses: actions/upload-artifact@v4 | ||
| 125 | + with: | ||
| 126 | + name: kitten-tts-wavs-${{ matrix.os }} | ||
| 127 | + path: ./generated-kitten-en-cxx.wav | ||
| 128 | + | ||
| 90 | - name: Test NeMo Canary | 129 | - name: Test NeMo Canary |
| 91 | shell: bash | 130 | shell: bash |
| 92 | run: | | 131 | run: | |
| @@ -185,7 +185,7 @@ libsherpa-onnx-cxx-api.so. | @@ -185,7 +185,7 @@ libsherpa-onnx-cxx-api.so. | ||
| 185 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users | 185 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users |
| 186 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. | 186 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. |
| 187 | 187 | ||
| 188 | -In any case, libonnxruntime.is is always needed. | 188 | +In any case, libonnxruntime.so is always needed. |
| 189 | EOF | 189 | EOF |
| 190 | ls -lh install/lib/README.md | 190 | ls -lh install/lib/README.md |
| 191 | fi | 191 | fi |
| @@ -185,7 +185,7 @@ libsherpa-onnx-cxx-api.so. | @@ -185,7 +185,7 @@ libsherpa-onnx-cxx-api.so. | ||
| 185 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users | 185 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users |
| 186 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. | 186 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. |
| 187 | 187 | ||
| 188 | -In any case, libonnxruntime.is is always needed. | 188 | +In any case, libonnxruntime.so is always needed. |
| 189 | EOF | 189 | EOF |
| 190 | ls -lh install/lib/README.md | 190 | ls -lh install/lib/README.md |
| 191 | fi | 191 | fi |
| @@ -164,7 +164,7 @@ libsherpa-onnx-cxx-api.so. | @@ -164,7 +164,7 @@ libsherpa-onnx-cxx-api.so. | ||
| 164 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users | 164 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users |
| 165 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. | 165 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. |
| 166 | 166 | ||
| 167 | -In any case, libonnxruntime.is is always needed. | 167 | +In any case, libonnxruntime.so is always needed. |
| 168 | EOF | 168 | EOF |
| 169 | ls -lh install/lib/README.md | 169 | ls -lh install/lib/README.md |
| 170 | fi | 170 | fi |
| @@ -125,7 +125,7 @@ libsherpa-onnx-cxx-api.so. | @@ -125,7 +125,7 @@ libsherpa-onnx-cxx-api.so. | ||
| 125 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users | 125 | libsherpa-onnx-c-api.so and libsherpa-onnx-cxx-api.so are for users |
| 126 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. | 126 | who don't use JNI. In that case, libsherpa-onnx-jni.so is not needed. |
| 127 | 127 | ||
| 128 | -In any case, libonnxruntime.is is always needed. | 128 | +In any case, libonnxruntime.so is always needed. |
| 129 | EOF | 129 | EOF |
| 130 | ls -lh install/lib/README.md | 130 | ls -lh install/lib/README.md |
| 131 | fi | 131 | fi |
| @@ -55,8 +55,9 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -55,8 +55,9 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 55 | 55 | ||
| 56 | const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); | 56 | const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); |
| 57 | // mapping of sid to voice name | 57 | // mapping of sid to voice name |
| 58 | - // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam | ||
| 59 | - // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis | 58 | + // 0->expr-voice-2-m, 1->expr-voice-2-f, 2->expr-voice-3-m |
| 59 | + // 3->expr-voice-3-f, 4->expr-voice-4-m, 5->expr-voice-4-f | ||
| 60 | + // 6->expr-voice-5-m, 7->expr-voice-5-f | ||
| 60 | int32_t sid = 0; | 61 | int32_t sid = 0; |
| 61 | float speed = 1.0; // larger -> faster in speech speed | 62 | float speed = 1.0; // larger -> faster in speech speed |
| 62 | 63 |
| @@ -124,6 +124,9 @@ if(SHERPA_ONNX_ENABLE_TTS) | @@ -124,6 +124,9 @@ if(SHERPA_ONNX_ENABLE_TTS) | ||
| 124 | add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc) | 124 | add_executable(kokoro-tts-en-cxx-api ./kokoro-tts-en-cxx-api.cc) |
| 125 | target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api) | 125 | target_link_libraries(kokoro-tts-en-cxx-api sherpa-onnx-cxx-api) |
| 126 | 126 | ||
| 127 | + add_executable(kitten-tts-en-cxx-api ./kitten-tts-en-cxx-api.cc) | ||
| 128 | + target_link_libraries(kitten-tts-en-cxx-api sherpa-onnx-cxx-api) | ||
| 129 | + | ||
| 127 | add_executable(kokoro-tts-zh-en-cxx-api ./kokoro-tts-zh-en-cxx-api.cc) | 130 | add_executable(kokoro-tts-zh-en-cxx-api ./kokoro-tts-zh-en-cxx-api.cc) |
| 128 | target_link_libraries(kokoro-tts-zh-en-cxx-api sherpa-onnx-cxx-api) | 131 | target_link_libraries(kokoro-tts-zh-en-cxx-api sherpa-onnx-cxx-api) |
| 129 | endif() | 132 | endif() |
cxx-api-examples/kitten-tts-en-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/kitten-tts-en-cxx-api.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// This file shows how to use sherpa-onnx CXX API | ||
| 6 | +// for English TTS with Kitten. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +/* | ||
| 10 | +Usage | ||
| 11 | + | ||
| 12 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 13 | +tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 14 | +rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 15 | + | ||
| 16 | +./kitten-tts-en-cxx-api | ||
| 17 | + | ||
| 18 | + */ | ||
| 19 | +// clang-format on | ||
| 20 | + | ||
| 21 | +#include <cstdint> | ||
| 22 | +#include <cstdio> | ||
| 23 | +#include <string> | ||
| 24 | + | ||
| 25 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 26 | + | ||
| 27 | +static int32_t ProgressCallback(const float *samples, int32_t num_samples, | ||
| 28 | + float progress, void *arg) { | ||
| 29 | + fprintf(stderr, "Progress: %.3f%%\n", progress * 100); | ||
| 30 | + // return 1 to continue generating | ||
| 31 | + // return 0 to stop generating | ||
| 32 | + return 1; | ||
| 33 | +} | ||
| 34 | + | ||
| 35 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 36 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 37 | + OfflineTtsConfig config; | ||
| 38 | + | ||
| 39 | + config.model.kitten.model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx"; | ||
| 40 | + config.model.kitten.voices = "./kitten-nano-en-v0_1-fp16/voices.bin"; | ||
| 41 | + config.model.kitten.tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt"; | ||
| 42 | + config.model.kitten.data_dir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data"; | ||
| 43 | + | ||
| 44 | + config.model.num_threads = 2; | ||
| 45 | + | ||
| 46 | + // If you don't want to see debug messages, please set it to 0 | ||
| 47 | + config.model.debug = 1; | ||
| 48 | + | ||
| 49 | + std::string filename = "./generated-kitten-en-cxx.wav"; | ||
| 50 | + std::string text = | ||
| 51 | + "Today as always, men fall into two groups: slaves and free men. Whoever " | ||
| 52 | + "does not have two-thirds of his day for himself, is a slave, whatever " | ||
| 53 | + "he may be: a statesman, a businessman, an official, or a scholar. " | ||
| 54 | + "Friends fell out often because life was changing so fast. The easiest " | ||
| 55 | + "thing in the world was to lose touch with someone."; | ||
| 56 | + | ||
| 57 | + auto tts = OfflineTts::Create(config); | ||
| 58 | + int32_t sid = 0; | ||
| 59 | + float speed = 1.0; // larger -> faster in speech speed | ||
| 60 | + | ||
| 61 | +#if 0 | ||
| 62 | + // If you don't want to use a callback, then please enable this branch | ||
| 63 | + GeneratedAudio audio = tts.Generate(text, sid, speed); | ||
| 64 | +#else | ||
| 65 | + GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback); | ||
| 66 | +#endif | ||
| 67 | + | ||
| 68 | + WriteWave(filename, {audio.samples, audio.sample_rate}); | ||
| 69 | + | ||
| 70 | + fprintf(stderr, "Input text is: %s\n", text.c_str()); | ||
| 71 | + fprintf(stderr, "Speaker ID is: %d\n", sid); | ||
| 72 | + fprintf(stderr, "Saved to: %s\n", filename.c_str()); | ||
| 73 | + | ||
| 74 | + return 0; | ||
| 75 | +} |
| 1 | -// cxx-api-examples/kokoro-tts-en-cxx-api.c | 1 | +// cxx-api-examples/kokoro-tts-en-cxx-api.cc |
| 2 | // | 2 | // |
| 3 | // Copyright (c) 2025 Xiaomi Corporation | 3 | // Copyright (c) 2025 Xiaomi Corporation |
| 4 | 4 | ||
| @@ -18,6 +18,8 @@ rm kokoro-en-v0_19.tar.bz2 | @@ -18,6 +18,8 @@ rm kokoro-en-v0_19.tar.bz2 | ||
| 18 | */ | 18 | */ |
| 19 | // clang-format on | 19 | // clang-format on |
| 20 | 20 | ||
| 21 | +#include <cstdint> | ||
| 22 | +#include <cstdio> | ||
| 21 | #include <string> | 23 | #include <string> |
| 22 | 24 | ||
| 23 | #include "sherpa-onnx/c-api/cxx-api.h" | 25 | #include "sherpa-onnx/c-api/cxx-api.h" |
| @@ -66,7 +68,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -66,7 +68,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 66 | WriteWave(filename, {audio.samples, audio.sample_rate}); | 68 | WriteWave(filename, {audio.samples, audio.sample_rate}); |
| 67 | 69 | ||
| 68 | fprintf(stderr, "Input text is: %s\n", text.c_str()); | 70 | fprintf(stderr, "Input text is: %s\n", text.c_str()); |
| 69 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 71 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 70 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); | 72 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); |
| 71 | 73 | ||
| 72 | return 0; | 74 | return 0; |
| @@ -18,6 +18,8 @@ rm kokoro-multi-lang-v1_0.tar.bz2 | @@ -18,6 +18,8 @@ rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 18 | */ | 18 | */ |
| 19 | // clang-format on | 19 | // clang-format on |
| 20 | 20 | ||
| 21 | +#include <cstdint> | ||
| 22 | +#include <cstdio> | ||
| 21 | #include <string> | 23 | #include <string> |
| 22 | 24 | ||
| 23 | #include "sherpa-onnx/c-api/cxx-api.h" | 25 | #include "sherpa-onnx/c-api/cxx-api.h" |
| @@ -67,7 +69,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -67,7 +69,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 67 | WriteWave(filename, {audio.samples, audio.sample_rate}); | 69 | WriteWave(filename, {audio.samples, audio.sample_rate}); |
| 68 | 70 | ||
| 69 | fprintf(stderr, "Input text is: %s\n", text.c_str()); | 71 | fprintf(stderr, "Input text is: %s\n", text.c_str()); |
| 70 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 72 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 71 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); | 73 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); |
| 72 | 74 | ||
| 73 | return 0; | 75 | return 0; |
| @@ -20,6 +20,8 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/voco | @@ -20,6 +20,8 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/voco | ||
| 20 | */ | 20 | */ |
| 21 | // clang-format on | 21 | // clang-format on |
| 22 | 22 | ||
| 23 | +#include <cstdint> | ||
| 24 | +#include <cstdio> | ||
| 23 | #include <string> | 25 | #include <string> |
| 24 | 26 | ||
| 25 | #include "sherpa-onnx/c-api/cxx-api.h" | 27 | #include "sherpa-onnx/c-api/cxx-api.h" |
| @@ -73,7 +75,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -73,7 +75,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 73 | WriteWave(filename, {audio.samples, audio.sample_rate}); | 75 | WriteWave(filename, {audio.samples, audio.sample_rate}); |
| 74 | 76 | ||
| 75 | fprintf(stderr, "Input text is: %s\n", text.c_str()); | 77 | fprintf(stderr, "Input text is: %s\n", text.c_str()); |
| 76 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 78 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 77 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); | 79 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); |
| 78 | 80 | ||
| 79 | return 0; | 81 | return 0; |
| @@ -20,6 +20,8 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/voco | @@ -20,6 +20,8 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/voco | ||
| 20 | */ | 20 | */ |
| 21 | // clang-format on | 21 | // clang-format on |
| 22 | 22 | ||
| 23 | +#include <cstdint> | ||
| 24 | +#include <cstdio> | ||
| 23 | #include <string> | 25 | #include <string> |
| 24 | 26 | ||
| 25 | #include "sherpa-onnx/c-api/cxx-api.h" | 27 | #include "sherpa-onnx/c-api/cxx-api.h" |
| @@ -72,7 +74,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -72,7 +74,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 72 | WriteWave(filename, {audio.samples, audio.sample_rate}); | 74 | WriteWave(filename, {audio.samples, audio.sample_rate}); |
| 73 | 75 | ||
| 74 | fprintf(stderr, "Input text is: %s\n", text.c_str()); | 76 | fprintf(stderr, "Input text is: %s\n", text.c_str()); |
| 75 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 77 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 76 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); | 78 | fprintf(stderr, "Saved to: %s\n", filename.c_str()); |
| 77 | 79 | ||
| 78 | return 0; | 80 | return 0; |
| @@ -392,6 +392,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { | @@ -392,6 +392,12 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) { | ||
| 392 | c.model.kokoro.lexicon = config.model.kokoro.lexicon.c_str(); | 392 | c.model.kokoro.lexicon = config.model.kokoro.lexicon.c_str(); |
| 393 | c.model.kokoro.lang = config.model.kokoro.lang.c_str(); | 393 | c.model.kokoro.lang = config.model.kokoro.lang.c_str(); |
| 394 | 394 | ||
| 395 | + c.model.kitten.model = config.model.kitten.model.c_str(); | ||
| 396 | + c.model.kitten.voices = config.model.kitten.voices.c_str(); | ||
| 397 | + c.model.kitten.tokens = config.model.kitten.tokens.c_str(); | ||
| 398 | + c.model.kitten.data_dir = config.model.kitten.data_dir.c_str(); | ||
| 399 | + c.model.kitten.length_scale = config.model.kitten.length_scale; | ||
| 400 | + | ||
| 395 | c.model.num_threads = config.model.num_threads; | 401 | c.model.num_threads = config.model.num_threads; |
| 396 | c.model.debug = config.model.debug; | 402 | c.model.debug = config.model.debug; |
| 397 | c.model.provider = config.model.provider.c_str(); | 403 | c.model.provider = config.model.provider.c_str(); |
| @@ -394,10 +394,20 @@ struct OfflineTtsKokoroModelConfig { | @@ -394,10 +394,20 @@ struct OfflineTtsKokoroModelConfig { | ||
| 394 | float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed | 394 | float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed |
| 395 | }; | 395 | }; |
| 396 | 396 | ||
| 397 | +struct OfflineTtsKittenModelConfig { | ||
| 398 | + std::string model; | ||
| 399 | + std::string voices; | ||
| 400 | + std::string tokens; | ||
| 401 | + std::string data_dir; | ||
| 402 | + | ||
| 403 | + float length_scale = 1.0; // < 1, faster in speed; > 1, slower in speed | ||
| 404 | +}; | ||
| 405 | + | ||
| 397 | struct OfflineTtsModelConfig { | 406 | struct OfflineTtsModelConfig { |
| 398 | OfflineTtsVitsModelConfig vits; | 407 | OfflineTtsVitsModelConfig vits; |
| 399 | OfflineTtsMatchaModelConfig matcha; | 408 | OfflineTtsMatchaModelConfig matcha; |
| 400 | OfflineTtsKokoroModelConfig kokoro; | 409 | OfflineTtsKokoroModelConfig kokoro; |
| 410 | + OfflineTtsKittenModelConfig kitten; | ||
| 401 | int32_t num_threads = 1; | 411 | int32_t num_threads = 1; |
| 402 | bool debug = false; | 412 | bool debug = false; |
| 403 | std::string provider = "cpu"; | 413 | std::string provider = "cpu"; |
-
请 注册 或 登录 后发表评论