Committed by
GitHub
Add Swift API for Kokoro TTS models (#1721)
正在显示
8 个修改的文件
包含
134 行增加
和
7 行删除
| @@ -11,6 +11,10 @@ ls -lh | @@ -11,6 +11,10 @@ ls -lh | ||
| 11 | ls -lh | 11 | ls -lh |
| 12 | rm -rf vits-piper-* | 12 | rm -rf vits-piper-* |
| 13 | 13 | ||
| 14 | +./run-tts-kokoro-en.sh | ||
| 15 | +ls -lh | ||
| 16 | +rm -rf kokoro-en-* | ||
| 17 | + | ||
| 14 | ./run-tts-matcha-zh.sh | 18 | ./run-tts-matcha-zh.sh |
| 15 | ls -lh | 19 | ls -lh |
| 16 | rm -rf matcha-icefall-* | 20 | rm -rf matcha-icefall-* |
| @@ -736,7 +736,8 @@ func sherpaOnnxOfflineTtsVitsModelConfig( | @@ -736,7 +736,8 @@ func sherpaOnnxOfflineTtsVitsModelConfig( | ||
| 736 | noise_scale: noiseScale, | 736 | noise_scale: noiseScale, |
| 737 | noise_scale_w: noiseScaleW, | 737 | noise_scale_w: noiseScaleW, |
| 738 | length_scale: lengthScale, | 738 | length_scale: lengthScale, |
| 739 | - dict_dir: toCPointer(dictDir)) | 739 | + dict_dir: toCPointer(dictDir) |
| 740 | + ) | ||
| 740 | } | 741 | } |
| 741 | 742 | ||
| 742 | func sherpaOnnxOfflineTtsMatchaModelConfig( | 743 | func sherpaOnnxOfflineTtsMatchaModelConfig( |
| @@ -757,12 +758,30 @@ func sherpaOnnxOfflineTtsMatchaModelConfig( | @@ -757,12 +758,30 @@ func sherpaOnnxOfflineTtsMatchaModelConfig( | ||
| 757 | data_dir: toCPointer(dataDir), | 758 | data_dir: toCPointer(dataDir), |
| 758 | noise_scale: noiseScale, | 759 | noise_scale: noiseScale, |
| 759 | length_scale: lengthScale, | 760 | length_scale: lengthScale, |
| 760 | - dict_dir: toCPointer(dictDir)) | 761 | + dict_dir: toCPointer(dictDir) |
| 762 | + ) | ||
| 763 | +} | ||
| 764 | + | ||
| 765 | +func sherpaOnnxOfflineTtsKokoroModelConfig( | ||
| 766 | + model: String = "", | ||
| 767 | + voices: String = "", | ||
| 768 | + tokens: String = "", | ||
| 769 | + dataDir: String = "", | ||
| 770 | + lengthScale: Float = 1.0 | ||
| 771 | +) -> SherpaOnnxOfflineTtsKokoroModelConfig { | ||
| 772 | + return SherpaOnnxOfflineTtsKokoroModelConfig( | ||
| 773 | + model: toCPointer(model), | ||
| 774 | + voices: toCPointer(voices), | ||
| 775 | + tokens: toCPointer(tokens), | ||
| 776 | + data_dir: toCPointer(dataDir), | ||
| 777 | + length_scale: lengthScale | ||
| 778 | + ) | ||
| 761 | } | 779 | } |
| 762 | 780 | ||
| 763 | func sherpaOnnxOfflineTtsModelConfig( | 781 | func sherpaOnnxOfflineTtsModelConfig( |
| 764 | vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(), | 782 | vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(), |
| 765 | matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(), | 783 | matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(), |
| 784 | + kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(), | ||
| 766 | numThreads: Int = 1, | 785 | numThreads: Int = 1, |
| 767 | debug: Int = 0, | 786 | debug: Int = 0, |
| 768 | provider: String = "cpu" | 787 | provider: String = "cpu" |
| @@ -772,7 +791,8 @@ func sherpaOnnxOfflineTtsModelConfig( | @@ -772,7 +791,8 @@ func sherpaOnnxOfflineTtsModelConfig( | ||
| 772 | num_threads: Int32(numThreads), | 791 | num_threads: Int32(numThreads), |
| 773 | debug: Int32(debug), | 792 | debug: Int32(debug), |
| 774 | provider: toCPointer(provider), | 793 | provider: toCPointer(provider), |
| 775 | - matcha: matcha | 794 | + matcha: matcha, |
| 795 | + kokoro: kokoro | ||
| 776 | ) | 796 | ) |
| 777 | } | 797 | } |
| 778 | 798 | ||
| @@ -780,7 +800,7 @@ func sherpaOnnxOfflineTtsConfig( | @@ -780,7 +800,7 @@ func sherpaOnnxOfflineTtsConfig( | ||
| 780 | model: SherpaOnnxOfflineTtsModelConfig, | 800 | model: SherpaOnnxOfflineTtsModelConfig, |
| 781 | ruleFsts: String = "", | 801 | ruleFsts: String = "", |
| 782 | ruleFars: String = "", | 802 | ruleFars: String = "", |
| 783 | - maxNumSentences: Int = 2 | 803 | + maxNumSentences: Int = 1 |
| 784 | ) -> SherpaOnnxOfflineTtsConfig { | 804 | ) -> SherpaOnnxOfflineTtsConfig { |
| 785 | return SherpaOnnxOfflineTtsConfig( | 805 | return SherpaOnnxOfflineTtsConfig( |
| 786 | model: model, | 806 | model: model, |
swift-api-examples/run-tts-kokoro-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +# please visit | ||
| 11 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 12 | +# to download more models | ||
| 13 | +if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then | ||
| 14 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 15 | + tar xf kokoro-en-v0_19.tar.bz2 | ||
| 16 | + rm kokoro-en-v0_19.tar.bz2 | ||
| 17 | +fi | ||
| 18 | + | ||
| 19 | +if [ ! -e ./tts-kokoro-en ]; then | ||
| 20 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 21 | + swiftc \ | ||
| 22 | + -lc++ \ | ||
| 23 | + -I ../build-swift-macos/install/include \ | ||
| 24 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 25 | + ./tts-kokoro-en.swift ./SherpaOnnx.swift \ | ||
| 26 | + -L ../build-swift-macos/install/lib/ \ | ||
| 27 | + -l sherpa-onnx \ | ||
| 28 | + -l onnxruntime \ | ||
| 29 | + -o tts-kokoro-en | ||
| 30 | + | ||
| 31 | + strip tts-kokoro-en | ||
| 32 | +else | ||
| 33 | + echo "./tts-kokoro-en exists - skip building" | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 37 | +./tts-kokoro-en |
| @@ -21,7 +21,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then | @@ -21,7 +21,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 21 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 21 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx |
| 22 | fi | 22 | fi |
| 23 | 23 | ||
| 24 | -if [ ! -e ./tts ]; then | 24 | +if [ ! -e ./tts-matcha-en ]; then |
| 25 | # Note: We use -lc++ to link against libc++ instead of libstdc++ | 25 | # Note: We use -lc++ to link against libc++ instead of libstdc++ |
| 26 | swiftc \ | 26 | swiftc \ |
| 27 | -lc++ \ | 27 | -lc++ \ |
| @@ -20,7 +20,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then | @@ -20,7 +20,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 20 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 20 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx |
| 21 | fi | 21 | fi |
| 22 | 22 | ||
| 23 | -if [ ! -e ./tts ]; then | 23 | +if [ ! -e ./tts-matcha-zh ]; then |
| 24 | # Note: We use -lc++ to link against libc++ instead of libstdc++ | 24 | # Note: We use -lc++ to link against libc++ instead of libstdc++ |
| 25 | swiftc \ | 25 | swiftc \ |
| 26 | -lc++ \ | 26 | -lc++ \ |
| @@ -15,7 +15,7 @@ if [ ! -d ./vits-piper-en_US-amy-low ]; then | @@ -15,7 +15,7 @@ if [ ! -d ./vits-piper-en_US-amy-low ]; then | ||
| 15 | rm vits-piper-en_US-amy-low.tar.bz2 | 15 | rm vits-piper-en_US-amy-low.tar.bz2 |
| 16 | fi | 16 | fi |
| 17 | 17 | ||
| 18 | -if [ ! -e ./tts ]; then | 18 | +if [ ! -e ./tts-vits ]; then |
| 19 | # Note: We use -lc++ to link against libc++ instead of libstdc++ | 19 | # Note: We use -lc++ to link against libc++ instead of libstdc++ |
| 20 | swiftc \ | 20 | swiftc \ |
| 21 | -lc++ \ | 21 | -lc++ \ |
swift-api-examples/tts-kokoro-en.swift
0 → 100644
| 1 | +class MyClass { | ||
| 2 | + func playSamples(samples: [Float]) { | ||
| 3 | + print("Play \(samples.count) samples") | ||
| 4 | + } | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +func run() { | ||
| 8 | + let model = "./kokoro-en-v0_19/model.onnx" | ||
| 9 | + let voices = "./kokoro-en-v0_19/voices.bin" | ||
| 10 | + let tokens = "./kokoro-en-v0_19/tokens.txt" | ||
| 11 | + let dataDir = "./kokoro-en-v0_19/espeak-ng-data" | ||
| 12 | + let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig( | ||
| 13 | + model: model, | ||
| 14 | + voices: voices, | ||
| 15 | + tokens: tokens, | ||
| 16 | + dataDir: dataDir | ||
| 17 | + ) | ||
| 18 | + let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0) | ||
| 19 | + var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig) | ||
| 20 | + | ||
| 21 | + let myClass = MyClass() | ||
| 22 | + | ||
| 23 | + // We use Unretained here so myClass must be kept alive as the callback is invoked | ||
| 24 | + // | ||
| 25 | + // See also | ||
| 26 | + // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6 | ||
| 27 | + let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque() | ||
| 28 | + | ||
| 29 | + let callback: TtsCallbackWithArg = { samples, n, arg in | ||
| 30 | + let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue() | ||
| 31 | + var savedSamples: [Float] = [] | ||
| 32 | + for index in 0..<n { | ||
| 33 | + savedSamples.append(samples![Int(index)]) | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + o.playSamples(samples: savedSamples) | ||
| 37 | + | ||
| 38 | + // return 1 so that it continues generating | ||
| 39 | + return 1 | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig) | ||
| 43 | + | ||
| 44 | + let text = | ||
| 45 | + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
| 46 | + let sid = 0 | ||
| 47 | + let speed: Float = 1.0 | ||
| 48 | + | ||
| 49 | + let audio = tts.generateWithCallbackWithArg( | ||
| 50 | + text: text, callback: callback, arg: arg, sid: sid, speed: speed) | ||
| 51 | + let filename = "test-kokoro-en.wav" | ||
| 52 | + let ok = audio.save(filename: filename) | ||
| 53 | + if ok == 1 { | ||
| 54 | + print("\nSaved to:\(filename)") | ||
| 55 | + } else { | ||
| 56 | + print("Failed to save to \(filename)") | ||
| 57 | + } | ||
| 58 | +} | ||
| 59 | + | ||
| 60 | +@main | ||
| 61 | +struct App { | ||
| 62 | + static func main() { | ||
| 63 | + run() | ||
| 64 | + } | ||
| 65 | +} |
-
请 注册 或 登录 后发表评论