正在显示
7 个修改的文件
包含
119 行增加
和
4 行删除
| @@ -11,6 +11,10 @@ ls -lh | @@ -11,6 +11,10 @@ ls -lh | ||
| 11 | ls -lh | 11 | ls -lh |
| 12 | rm -rf vits-piper-* | 12 | rm -rf vits-piper-* |
| 13 | 13 | ||
| 14 | +./run-tts-kokoro-zh-en.sh | ||
| 15 | +ls -lh | ||
| 16 | +rm -rf kokoro-multi-* | ||
| 17 | + | ||
| 14 | ./run-tts-kokoro-en.sh | 18 | ./run-tts-kokoro-en.sh |
| 15 | ls -lh | 19 | ls -lh |
| 16 | rm -rf kokoro-en-* | 20 | rm -rf kokoro-en-* |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | // Copyright (c) 2025 Xiaomi Corporation | 3 | // Copyright (c) 2025 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // This file shows how to use sherpa-onnx CXX API | 5 | // This file shows how to use sherpa-onnx CXX API |
| 6 | -// for Chinese TTS with Kokoro. | 6 | +// for English TTS with Kokoro. |
| 7 | // | 7 | // |
| 8 | // clang-format off | 8 | // clang-format off |
| 9 | /* | 9 | /* |
| @@ -3,7 +3,7 @@ | @@ -3,7 +3,7 @@ | ||
| 3 | // Copyright (c) 2025 Xiaomi Corporation | 3 | // Copyright (c) 2025 Xiaomi Corporation |
| 4 | 4 | ||
| 5 | // This file shows how to use sherpa-onnx CXX API | 5 | // This file shows how to use sherpa-onnx CXX API |
| 6 | -// for Chinese TTS with Kokoro. | 6 | +// for Chinese + English TTS with Kokoro. |
| 7 | // | 7 | // |
| 8 | // clang-format off | 8 | // clang-format off |
| 9 | /* | 9 | /* |
| @@ -767,14 +767,18 @@ func sherpaOnnxOfflineTtsKokoroModelConfig( | @@ -767,14 +767,18 @@ func sherpaOnnxOfflineTtsKokoroModelConfig( | ||
| 767 | voices: String = "", | 767 | voices: String = "", |
| 768 | tokens: String = "", | 768 | tokens: String = "", |
| 769 | dataDir: String = "", | 769 | dataDir: String = "", |
| 770 | - lengthScale: Float = 1.0 | 770 | + lengthScale: Float = 1.0, |
| 771 | + dictDir: String = "", | ||
| 772 | + lexicon: String = "" | ||
| 771 | ) -> SherpaOnnxOfflineTtsKokoroModelConfig { | 773 | ) -> SherpaOnnxOfflineTtsKokoroModelConfig { |
| 772 | return SherpaOnnxOfflineTtsKokoroModelConfig( | 774 | return SherpaOnnxOfflineTtsKokoroModelConfig( |
| 773 | model: toCPointer(model), | 775 | model: toCPointer(model), |
| 774 | voices: toCPointer(voices), | 776 | voices: toCPointer(voices), |
| 775 | tokens: toCPointer(tokens), | 777 | tokens: toCPointer(tokens), |
| 776 | data_dir: toCPointer(dataDir), | 778 | data_dir: toCPointer(dataDir), |
| 777 | - length_scale: lengthScale | 779 | + length_scale: lengthScale, |
| 780 | + dict_dir: toCPointer(dictDir), | ||
| 781 | + lexicon: toCPointer(lexicon) | ||
| 778 | ) | 782 | ) |
| 779 | } | 783 | } |
| 780 | 784 |
swift-api-examples/run-tts-kokoro-zh-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +# please visit | ||
| 11 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 12 | +# to download more models | ||
| 13 | +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then | ||
| 14 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | ||
| 15 | + tar xf kokoro-multi-lang-v1_0.tar.bz2 | ||
| 16 | + rm kokoro-multi-lang-v1_0.tar.bz2 | ||
| 17 | +fi | ||
| 18 | + | ||
| 19 | +if [ ! -e ./tts-kokoro-zh-en ]; then | ||
| 20 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 21 | + swiftc \ | ||
| 22 | + -lc++ \ | ||
| 23 | + -I ../build-swift-macos/install/include \ | ||
| 24 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 25 | + ./tts-kokoro-zh-en.swift ./SherpaOnnx.swift \ | ||
| 26 | + -L ../build-swift-macos/install/lib/ \ | ||
| 27 | + -l sherpa-onnx \ | ||
| 28 | + -l onnxruntime \ | ||
| 29 | + -o tts-kokoro-zh-en | ||
| 30 | + | ||
| 31 | + strip tts-kokoro-zh-en | ||
| 32 | +else | ||
| 33 | + echo "./tts-kokoro-zh-en exists - skip building" | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 37 | +./tts-kokoro-zh-en |
swift-api-examples/tts-kokoro-zh-en.swift
0 → 100644
| 1 | +class MyClass { | ||
| 2 | + func playSamples(samples: [Float]) { | ||
| 3 | + print("Play \(samples.count) samples") | ||
| 4 | + } | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +func run() { | ||
| 8 | + let model = "./kokoro-multi-lang-v1_0/model.onnx" | ||
| 9 | + let voices = "./kokoro-multi-lang-v1_0/voices.bin" | ||
| 10 | + let tokens = "./kokoro-multi-lang-v1_0/tokens.txt" | ||
| 11 | + let dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data" | ||
| 12 | + let dictDir = "./kokoro-multi-lang-v1_0/dict" | ||
| 13 | + let lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt" | ||
| 14 | + let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig( | ||
| 15 | + model: model, | ||
| 16 | + voices: voices, | ||
| 17 | + tokens: tokens, | ||
| 18 | + dataDir: dataDir, | ||
| 19 | + dictDir: dictDir, | ||
| 20 | + lexicon: lexicon | ||
| 21 | + ) | ||
| 22 | + let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0) | ||
| 23 | + var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig) | ||
| 24 | + | ||
| 25 | + let myClass = MyClass() | ||
| 26 | + | ||
| 27 | + // We use Unretained here so myClass must be kept alive as the callback is invoked | ||
| 28 | + // | ||
| 29 | + // See also | ||
| 30 | + // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6 | ||
| 31 | + let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque() | ||
| 32 | + | ||
| 33 | + let callback: TtsCallbackWithArg = { samples, n, arg in | ||
| 34 | + let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue() | ||
| 35 | + var savedSamples: [Float] = [] | ||
| 36 | + for index in 0..<n { | ||
| 37 | + savedSamples.append(samples![Int(index)]) | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + o.playSamples(samples: savedSamples) | ||
| 41 | + | ||
| 42 | + // return 1 so that it continues generating | ||
| 43 | + return 1 | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig) | ||
| 47 | + | ||
| 48 | + let text = | ||
| 49 | + "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" | ||
| 50 | + let sid = 0 | ||
| 51 | + let speed: Float = 1.0 | ||
| 52 | + | ||
| 53 | + let audio = tts.generateWithCallbackWithArg( | ||
| 54 | + text: text, callback: callback, arg: arg, sid: sid, speed: speed) | ||
| 55 | + let filename = "test-kokoro-zh-en.wav" | ||
| 56 | + let ok = audio.save(filename: filename) | ||
| 57 | + if ok == 1 { | ||
| 58 | + print("\nSaved to:\(filename)") | ||
| 59 | + } else { | ||
| 60 | + print("Failed to save to \(filename)") | ||
| 61 | + } | ||
| 62 | +} | ||
| 63 | + | ||
| 64 | +@main | ||
| 65 | +struct App { | ||
| 66 | + static func main() { | ||
| 67 | + run() | ||
| 68 | + } | ||
| 69 | +} |
-
请 注册 或 登录 后发表评论