Fangjun Kuang
Committed by GitHub

Add Swift API for KittenTTS (#2476)

@@ -9,6 +9,10 @@ ls -lh @@ -9,6 +9,10 @@ ls -lh
9 9
10 ./run-test-version.sh 10 ./run-test-version.sh
11 11
  12 +./run-tts-kitten-en.sh
  13 +ls -lh
  14 +rm -rf kitten-*
  15 +
12 ./run-zipformer-ctc-asr.sh 16 ./run-zipformer-ctc-asr.sh
13 rm -rf sherpa-onnx-zipformer-* 17 rm -rf sherpa-onnx-zipformer-*
14 18
@@ -20,3 +20,4 @@ decode-file-sense-voice-with-hr @@ -20,3 +20,4 @@ decode-file-sense-voice-with-hr
20 test-version 20 test-version
21 zipformer-ctc-asr 21 zipformer-ctc-asr
22 dolphin-ctc-asr 22 dolphin-ctc-asr
  23 +tts-kitten-en
@@ -884,13 +884,30 @@ func sherpaOnnxOfflineTtsKokoroModelConfig( @@ -884,13 +884,30 @@ func sherpaOnnxOfflineTtsKokoroModelConfig(
884 ) 884 )
885 } 885 }
886 886
  887 +func sherpaOnnxOfflineTtsKittenModelConfig(
  888 + model: String = "",
  889 + voices: String = "",
  890 + tokens: String = "",
  891 + dataDir: String = "",
  892 + lengthScale: Float = 1.0
  893 +) -> SherpaOnnxOfflineTtsKittenModelConfig {
  894 + return SherpaOnnxOfflineTtsKittenModelConfig(
  895 + model: toCPointer(model),
  896 + voices: toCPointer(voices),
  897 + tokens: toCPointer(tokens),
  898 + data_dir: toCPointer(dataDir),
  899 + length_scale: lengthScale
  900 + )
  901 +}
  902 +
887 func sherpaOnnxOfflineTtsModelConfig( 903 func sherpaOnnxOfflineTtsModelConfig(
888 vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(), 904 vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
889 matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(), 905 matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
890 kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(), 906 kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(),
891 numThreads: Int = 1, 907 numThreads: Int = 1,
892 debug: Int = 0, 908 debug: Int = 0,
893 - provider: String = "cpu" 909 + provider: String = "cpu",
  910 + kitten: SherpaOnnxOfflineTtsKittenModelConfig = sherpaOnnxOfflineTtsKittenModelConfig()
894 ) -> SherpaOnnxOfflineTtsModelConfig { 911 ) -> SherpaOnnxOfflineTtsModelConfig {
895 return SherpaOnnxOfflineTtsModelConfig( 912 return SherpaOnnxOfflineTtsModelConfig(
896 vits: vits, 913 vits: vits,
@@ -898,7 +915,8 @@ func sherpaOnnxOfflineTtsModelConfig( @@ -898,7 +915,8 @@ func sherpaOnnxOfflineTtsModelConfig(
898 debug: Int32(debug), 915 debug: Int32(debug),
899 provider: toCPointer(provider), 916 provider: toCPointer(provider),
900 matcha: matcha, 917 matcha: matcha,
901 - kokoro: kokoro 918 + kokoro: kokoro,
  919 + kitten: kitten
902 ) 920 )
903 } 921 }
904 922
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +# please visit
  11 +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html
  12 +# to download more models
  13 +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  15 + tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  16 + rm kitten-nano-en-v0_1-fp16.tar.bz2
  17 +fi
  18 +
  19 +if [ ! -e ./tts-kitten-en ]; then
  20 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  21 + swiftc \
  22 + -lc++ \
  23 + -I ../build-swift-macos/install/include \
  24 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  25 + ./tts-kitten-en.swift ./SherpaOnnx.swift \
  26 + -L ../build-swift-macos/install/lib/ \
  27 + -l sherpa-onnx \
  28 + -l onnxruntime \
  29 + -o tts-kitten-en
  30 +
  31 + strip tts-kitten-en
  32 +else
  33 + echo "./tts-kitten-en exists - skip building"
  34 +fi
  35 +
  36 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  37 +./tts-kitten-en
  1 +class MyClass {
  2 + func playSamples(samples: [Float]) {
  3 + print("Play \(samples.count) samples")
  4 + }
  5 +}
  6 +
  7 +func run() {
  8 + let model = "./kitten-nano-en-v0_1-fp16/model.fp16.onnx"
  9 + let voices = "./kitten-nano-en-v0_1-fp16/voices.bin"
  10 + let tokens = "./kitten-nano-en-v0_1-fp16/tokens.txt"
  11 + let dataDir = "./kitten-nano-en-v0_1-fp16/espeak-ng-data"
  12 + let kitten = sherpaOnnxOfflineTtsKittenModelConfig(
  13 + model: model,
  14 + voices: voices,
  15 + tokens: tokens,
  16 + dataDir: dataDir
  17 + )
  18 + let modelConfig = sherpaOnnxOfflineTtsModelConfig(debug: 0, kitten: kitten)
  19 + var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
  20 +
  21 + let myClass = MyClass()
  22 +
  23 + // We use Unretained here so myClass must be kept alive as the callback is invoked
  24 + //
  25 + // See also
  26 + // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
  27 + let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
  28 +
  29 + let callback: TtsCallbackWithArg = { samples, n, arg in
  30 + let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
  31 + var savedSamples: [Float] = []
  32 + for index in 0..<n {
  33 + savedSamples.append(samples![Int(index)])
  34 + }
  35 +
  36 + o.playSamples(samples: savedSamples)
  37 +
  38 + // return 1 so that it continues generating
  39 + return 1
  40 + }
  41 +
  42 + let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
  43 +
  44 + let text =
  45 + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  46 + let sid = 0
  47 + let speed: Float = 1.0
  48 +
  49 + let audio = tts.generateWithCallbackWithArg(
  50 + text: text, callback: callback, arg: arg, sid: sid, speed: speed)
  51 + let filename = "test-kitten-en.wav"
  52 + let ok = audio.save(filename: filename)
  53 + if ok == 1 {
  54 + print("\nSaved to:\(filename)")
  55 + } else {
  56 + print("Failed to save to \(filename)")
  57 + }
  58 +}
  59 +
  60 +@main
  61 +struct App {
  62 + static func main() {
  63 + run()
  64 + }
  65 +}