正在显示
10 个修改的文件
包含
148 行增加
和
5 行删除
| @@ -85,6 +85,41 @@ jobs: | @@ -85,6 +85,41 @@ jobs: | ||
| 85 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib | 85 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib |
| 86 | fi | 86 | fi |
| 87 | 87 | ||
| 88 | + - name: Test KittenTTS | ||
| 89 | + shell: bash | ||
| 90 | + run: | | ||
| 91 | + name=kitten-tts-en-c-api | ||
| 92 | + gcc -o $name ./c-api-examples/$name.c \ | ||
| 93 | + -I ./build/install/include \ | ||
| 94 | + -L ./build/install/lib/ \ | ||
| 95 | + -l sherpa-onnx-c-api \ | ||
| 96 | + -l onnxruntime | ||
| 97 | + | ||
| 98 | + ls -lh $name | ||
| 99 | + | ||
| 100 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 101 | + ldd ./$name | ||
| 102 | + echo "----" | ||
| 103 | + readelf -d ./$name | ||
| 104 | + fi | ||
| 105 | + | ||
| 106 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 107 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 108 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 109 | + | ||
| 110 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 111 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 112 | + | ||
| 113 | + ./$name | ||
| 114 | + | ||
| 115 | + rm $name | ||
| 116 | + rm -rf kitten-nano-en-v0_1-fp16 | ||
| 117 | + | ||
| 118 | + - uses: actions/upload-artifact@v4 | ||
| 119 | + with: | ||
| 120 | + name: kitten-tts-wavs-${{ matrix.os }} | ||
| 121 | + path: ./generated-kitten-en.wav | ||
| 122 | + | ||
| 88 | - name: Test streaming zipformer with homophone replacer | 123 | - name: Test streaming zipformer with homophone replacer |
| 89 | shell: bash | 124 | shell: bash |
| 90 | run: | | 125 | run: | |
| @@ -23,6 +23,9 @@ if(SHERPA_ONNX_ENABLE_TTS) | @@ -23,6 +23,9 @@ if(SHERPA_ONNX_ENABLE_TTS) | ||
| 23 | add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c) | 23 | add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c) |
| 24 | target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api) | 24 | target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api) |
| 25 | 25 | ||
| 26 | + add_executable(kitten-tts-en-c-api kitten-tts-en-c-api.c) | ||
| 27 | + target_link_libraries(kitten-tts-en-c-api sherpa-onnx-c-api) | ||
| 28 | + | ||
| 26 | add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c) | 29 | add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c) |
| 27 | target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api) | 30 | target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api) |
| 28 | endif() | 31 | endif() |
c-api-examples/kitten-tts-en-c-api.c
0 → 100644
| 1 | +// c-api-examples/kitten-tts-en-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// This file shows how to use sherpa-onnx C API | ||
| 6 | +// for English TTS with Kitten. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +/* | ||
| 10 | +Usage | ||
| 11 | + | ||
| 12 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 13 | +tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 14 | +rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 15 | + | ||
| 16 | +./kitten-tts-en-c-api | ||
| 17 | + | ||
| 18 | + */ | ||
| 19 | +// clang-format on | ||
| 20 | + | ||
| 21 | +#include <stdio.h> | ||
| 22 | +#include <stdlib.h> | ||
| 23 | +#include <string.h> | ||
| 24 | + | ||
| 25 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 26 | + | ||
| 27 | +static int32_t ProgressCallback(const float *samples, int32_t num_samples, | ||
| 28 | + float progress) { | ||
| 29 | + fprintf(stderr, "Progress: %.3f%%\n", progress * 100); | ||
| 30 | + // return 1 to continue generating | ||
| 31 | + // return 0 to stop generating | ||
| 32 | + return 1; | ||
| 33 | +} | ||
| 34 | + | ||
| 35 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 36 | + SherpaOnnxOfflineTtsConfig config; | ||
| 37 | + memset(&config, 0, sizeof(config)); | ||
| 38 | + config.model.kitten.model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx"; | ||
| 39 | + config.model.kitten.voices = "./kitten-nano-en-v0_1-fp16/voices.bin"; | ||
| 40 | + config.model.kitten.tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt"; | ||
| 41 | + config.model.kitten.data_dir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data"; | ||
| 42 | + | ||
| 43 | + config.model.num_threads = 2; | ||
| 44 | + | ||
| 45 | + // If you don't want to see debug messages, please set it to 0 | ||
| 46 | + config.model.debug = 1; | ||
| 47 | + | ||
| 48 | + const char *filename = "./generated-kitten-en.wav"; | ||
| 49 | + const char *text = | ||
| 50 | + "Today as always, men fall into two groups: slaves and free men. Whoever " | ||
| 51 | + "does not have two-thirds of his day for himself, is a slave, whatever " | ||
| 52 | + "he may be: a statesman, a businessman, an official, or a scholar. " | ||
| 53 | + "Friends fell out often because life was changing so fast. The easiest " | ||
| 54 | + "thing in the world was to lose touch with someone."; | ||
| 55 | + | ||
| 56 | + const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); | ||
| 57 | + // mapping of sid to voice name | ||
| 58 | + // 0->af, 1->af_bella, 2->af_nicole, 3->af_sarah, 4->af_sky, 5->am_adam | ||
| 59 | + // 6->am_michael, 7->bf_emma, 8->bf_isabella, 9->bm_george, 10->bm_lewis | ||
| 60 | + int32_t sid = 0; | ||
| 61 | + float speed = 1.0; // larger -> faster in speech speed | ||
| 62 | + | ||
| 63 | +#if 0 | ||
| 64 | + // If you don't want to use a callback, then please enable this branch | ||
| 65 | + const SherpaOnnxGeneratedAudio *audio = | ||
| 66 | + SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed); | ||
| 67 | +#else | ||
| 68 | + const SherpaOnnxGeneratedAudio *audio = | ||
| 69 | + SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed, | ||
| 70 | + ProgressCallback); | ||
| 71 | +#endif | ||
| 72 | + | ||
| 73 | + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename); | ||
| 74 | + | ||
| 75 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); | ||
| 76 | + SherpaOnnxDestroyOfflineTts(tts); | ||
| 77 | + | ||
| 78 | + fprintf(stderr, "Input text is: %s\n", text); | ||
| 79 | + fprintf(stderr, "Speaker ID is: %d\n", sid); | ||
| 80 | + fprintf(stderr, "Saved to: %s\n", filename); | ||
| 81 | + | ||
| 82 | + return 0; | ||
| 83 | +} |
| @@ -77,7 +77,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -77,7 +77,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 77 | SherpaOnnxDestroyOfflineTts(tts); | 77 | SherpaOnnxDestroyOfflineTts(tts); |
| 78 | 78 | ||
| 79 | fprintf(stderr, "Input text is: %s\n", text); | 79 | fprintf(stderr, "Input text is: %s\n", text); |
| 80 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 80 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 81 | fprintf(stderr, "Saved to: %s\n", filename); | 81 | fprintf(stderr, "Saved to: %s\n", filename); |
| 82 | 82 | ||
| 83 | return 0; | 83 | return 0; |
| @@ -75,7 +75,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -75,7 +75,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 75 | SherpaOnnxDestroyOfflineTts(tts); | 75 | SherpaOnnxDestroyOfflineTts(tts); |
| 76 | 76 | ||
| 77 | fprintf(stderr, "Input text is: %s\n", text); | 77 | fprintf(stderr, "Input text is: %s\n", text); |
| 78 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 78 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 79 | fprintf(stderr, "Saved to: %s\n", filename); | 79 | fprintf(stderr, "Saved to: %s\n", filename); |
| 80 | 80 | ||
| 81 | return 0; | 81 | return 0; |
| @@ -80,7 +80,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -80,7 +80,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 80 | SherpaOnnxDestroyOfflineTts(tts); | 80 | SherpaOnnxDestroyOfflineTts(tts); |
| 81 | 81 | ||
| 82 | fprintf(stderr, "Input text is: %s\n", text); | 82 | fprintf(stderr, "Input text is: %s\n", text); |
| 83 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 83 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 84 | fprintf(stderr, "Saved to: %s\n", filename); | 84 | fprintf(stderr, "Saved to: %s\n", filename); |
| 85 | 85 | ||
| 86 | return 0; | 86 | return 0; |
| @@ -80,7 +80,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -80,7 +80,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 80 | SherpaOnnxDestroyOfflineTts(tts); | 80 | SherpaOnnxDestroyOfflineTts(tts); |
| 81 | 81 | ||
| 82 | fprintf(stderr, "Input text is: %s\n", text); | 82 | fprintf(stderr, "Input text is: %s\n", text); |
| 83 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 83 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 84 | fprintf(stderr, "Saved to: %s\n", filename); | 84 | fprintf(stderr, "Saved to: %s\n", filename); |
| 85 | 85 | ||
| 86 | return 0; | 86 | return 0; |
| @@ -240,7 +240,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -240,7 +240,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 240 | SherpaOnnxDestroyOfflineTts(tts); | 240 | SherpaOnnxDestroyOfflineTts(tts); |
| 241 | 241 | ||
| 242 | fprintf(stderr, "Input text is: %s\n", text); | 242 | fprintf(stderr, "Input text is: %s\n", text); |
| 243 | - fprintf(stderr, "Speaker ID is is: %d\n", sid); | 243 | + fprintf(stderr, "Speaker ID is: %d\n", sid); |
| 244 | fprintf(stderr, "Saved to: %s\n", filename); | 244 | fprintf(stderr, "Saved to: %s\n", filename); |
| 245 | 245 | ||
| 246 | free((void *)filename); | 246 | free((void *)filename); |
| @@ -1207,6 +1207,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( | @@ -1207,6 +1207,18 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( | ||
| 1207 | SHERPA_ONNX_OR(config->model.kokoro.lexicon, ""); | 1207 | SHERPA_ONNX_OR(config->model.kokoro.lexicon, ""); |
| 1208 | tts_config.model.kokoro.lang = SHERPA_ONNX_OR(config->model.kokoro.lang, ""); | 1208 | tts_config.model.kokoro.lang = SHERPA_ONNX_OR(config->model.kokoro.lang, ""); |
| 1209 | 1209 | ||
| 1210 | + // kitten | ||
| 1211 | + tts_config.model.kitten.model = | ||
| 1212 | + SHERPA_ONNX_OR(config->model.kitten.model, ""); | ||
| 1213 | + tts_config.model.kitten.voices = | ||
| 1214 | + SHERPA_ONNX_OR(config->model.kitten.voices, ""); | ||
| 1215 | + tts_config.model.kitten.tokens = | ||
| 1216 | + SHERPA_ONNX_OR(config->model.kitten.tokens, ""); | ||
| 1217 | + tts_config.model.kitten.data_dir = | ||
| 1218 | + SHERPA_ONNX_OR(config->model.kitten.data_dir, ""); | ||
| 1219 | + tts_config.model.kitten.length_scale = | ||
| 1220 | + SHERPA_ONNX_OR(config->model.kitten.length_scale, 1.0); | ||
| 1221 | + | ||
| 1210 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | 1222 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); |
| 1211 | tts_config.model.debug = config->model.debug; | 1223 | tts_config.model.debug = config->model.debug; |
| 1212 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 1224 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| @@ -1032,6 +1032,15 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig { | @@ -1032,6 +1032,15 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig { | ||
| 1032 | const char *lang; | 1032 | const char *lang; |
| 1033 | } SherpaOnnxOfflineTtsKokoroModelConfig; | 1033 | } SherpaOnnxOfflineTtsKokoroModelConfig; |
| 1034 | 1034 | ||
| 1035 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKittenModelConfig { | ||
| 1036 | + const char *model; | ||
| 1037 | + const char *voices; | ||
| 1038 | + const char *tokens; | ||
| 1039 | + const char *data_dir; | ||
| 1040 | + | ||
| 1041 | + float length_scale; // < 1, faster in speech speed; > 1, slower in speed | ||
| 1042 | +} SherpaOnnxOfflineTtsKittenModelConfig; | ||
| 1043 | + | ||
| 1035 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | 1044 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { |
| 1036 | SherpaOnnxOfflineTtsVitsModelConfig vits; | 1045 | SherpaOnnxOfflineTtsVitsModelConfig vits; |
| 1037 | int32_t num_threads; | 1046 | int32_t num_threads; |
| @@ -1039,6 +1048,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | @@ -1039,6 +1048,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | ||
| 1039 | const char *provider; | 1048 | const char *provider; |
| 1040 | SherpaOnnxOfflineTtsMatchaModelConfig matcha; | 1049 | SherpaOnnxOfflineTtsMatchaModelConfig matcha; |
| 1041 | SherpaOnnxOfflineTtsKokoroModelConfig kokoro; | 1050 | SherpaOnnxOfflineTtsKokoroModelConfig kokoro; |
| 1051 | + SherpaOnnxOfflineTtsKittenModelConfig kitten; | ||
| 1042 | } SherpaOnnxOfflineTtsModelConfig; | 1052 | } SherpaOnnxOfflineTtsModelConfig; |
| 1043 | 1053 | ||
| 1044 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { | 1054 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { |
-
请 注册 或 登录 后发表评论