Add Swift API for Kokoro TTS models (#1721)

Fangjun Kuang · GitHub
Commit ad61ad6ff52b8a0ad220d7d2a6959a7d9db341e1 ad61ad6f 1 parent cc812e62
.github/scripts/test-swift.sh
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/run-tts-kokoro-en.sh
swift-api-examples/run-tts-matcha-en.sh
swift-api-examples/run-tts-matcha-zh.sh
swift-api-examples/run-tts-vits.sh
swift-api-examples/tts-kokoro-en.swift
--- a/.github/scripts/test-swift.sh
查看文件 @ad61ad6
+++ b/.github/scripts/test-swift.sh
查看文件 @ad61ad6
@@ -11,6 +11,10 @@ ls -lh
 ls -lh
 rm -rf vits-piper-*
+./run-tts-kokoro-en.sh
+ls -lh
+rm -rf kokoro-en-*
+
 ./run-tts-matcha-zh.sh
 ls -lh
 rm -rf matcha-icefall-*
--- a/swift-api-examples/.gitignore
查看文件 @ad61ad6
+++ b/swift-api-examples/.gitignore
查看文件 @ad61ad6
@@ -12,3 +12,4 @@ keyword-spotting-from-file
 add-punctuations
 tts-matcha-zh
 tts-matcha-en
+tts-kokoro-en
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @ad61ad6
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @ad61ad6
@@ -736,7 +736,8 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
     noise_scale: noiseScale,
     noise_scale_w: noiseScaleW,
     length_scale: lengthScale,
-    dict_dir: toCPointer(dictDir))
+    dict_dir: toCPointer(dictDir)
+  )
 }
 func sherpaOnnxOfflineTtsMatchaModelConfig(
@@ -757,12 +758,30 @@ func sherpaOnnxOfflineTtsMatchaModelConfig(
     data_dir: toCPointer(dataDir),
     noise_scale: noiseScale,
     length_scale: lengthScale,
-    dict_dir: toCPointer(dictDir))
+    dict_dir: toCPointer(dictDir)
+  )
+}
+
+func sherpaOnnxOfflineTtsKokoroModelConfig(
+  model: String = "",
+  voices: String = "",
+  tokens: String = "",
+  dataDir: String = "",
+  lengthScale: Float = 1.0
+) -> SherpaOnnxOfflineTtsKokoroModelConfig {
+  return SherpaOnnxOfflineTtsKokoroModelConfig(
+    model: toCPointer(model),
+    voices: toCPointer(voices),
+    tokens: toCPointer(tokens),
+    data_dir: toCPointer(dataDir),
+    length_scale: lengthScale
+  )
 }
 func sherpaOnnxOfflineTtsModelConfig(
   vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
   matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
+  kokoro: SherpaOnnxOfflineTtsKokoroModelConfig = sherpaOnnxOfflineTtsKokoroModelConfig(),
   numThreads: Int = 1,
   debug: Int = 0,
   provider: String = "cpu"
@@ -772,7 +791,8 @@ func sherpaOnnxOfflineTtsModelConfig(
     num_threads: Int32(numThreads),
     debug: Int32(debug),
     provider: toCPointer(provider),
-    matcha: matcha
+    matcha: matcha,
+    kokoro: kokoro
   )
 }
@@ -780,7 +800,7 @@ func sherpaOnnxOfflineTtsConfig(
   model: SherpaOnnxOfflineTtsModelConfig,
   ruleFsts: String = "",
   ruleFars: String = "",
-  maxNumSentences: Int = 2
+  maxNumSentences: Int = 1
 ) -> SherpaOnnxOfflineTtsConfig {
   return SherpaOnnxOfflineTtsConfig(
     model: model,
--- a/swift-api-examples/run-tts-kokoro-en.sh 0 → 100755
查看文件 @ad61ad6
+++ b/swift-api-examples/run-tts-kokoro-en.sh 0 → 100755
查看文件 @ad61ad6
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+# please visit
+# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+# to download more models
+if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
+  tar xf kokoro-en-v0_19.tar.bz2
+  rm kokoro-en-v0_19.tar.bz2
+fi
+
+if [ ! -e ./tts-kokoro-en ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./tts-kokoro-en.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o tts-kokoro-en
+
+  strip tts-kokoro-en
+else
+  echo "./tts-kokoro-en exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./tts-kokoro-en
--- a/swift-api-examples/run-tts-matcha-en.sh
查看文件 @ad61ad6
+++ b/swift-api-examples/run-tts-matcha-en.sh
查看文件 @ad61ad6
@@ -21,7 +21,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then
   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
 fi
-if [ ! -e ./tts ]; then
+if [ ! -e ./tts-matcha-en ]; then
   # Note: We use -lc++ to link against libc++ instead of libstdc++
   swiftc \
     -lc++ \
--- a/swift-api-examples/run-tts-matcha-zh.sh
查看文件 @ad61ad6
+++ b/swift-api-examples/run-tts-matcha-zh.sh
查看文件 @ad61ad6
@@ -20,7 +20,7 @@ if [ ! -f ./hifigan_v2.onnx ]; then
   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
 fi
-if [ ! -e ./tts ]; then
+if [ ! -e ./tts-matcha-zh ]; then
   # Note: We use -lc++ to link against libc++ instead of libstdc++
   swiftc \
     -lc++ \
--- a/swift-api-examples/run-tts-vits.sh
查看文件 @ad61ad6
+++ b/swift-api-examples/run-tts-vits.sh
查看文件 @ad61ad6
@@ -15,7 +15,7 @@ if [ ! -d ./vits-piper-en_US-amy-low ]; then
   rm vits-piper-en_US-amy-low.tar.bz2
 fi
-if [ ! -e ./tts ]; then
+if [ ! -e ./tts-vits ]; then
   # Note: We use -lc++ to link against libc++ instead of libstdc++
   swiftc \
     -lc++ \
--- a/swift-api-examples/tts-kokoro-en.swift 0 → 100644
查看文件 @ad61ad6
+++ b/swift-api-examples/tts-kokoro-en.swift 0 → 100644
查看文件 @ad61ad6
+class MyClass {
+  func playSamples(samples: [Float]) {
+    print("Play \(samples.count) samples")
+  }
+}
+
+func run() {
+  let model = "./kokoro-en-v0_19/model.onnx"
+  let voices = "./kokoro-en-v0_19/voices.bin"
+  let tokens = "./kokoro-en-v0_19/tokens.txt"
+  let dataDir = "./kokoro-en-v0_19/espeak-ng-data"
+  let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
+    model: model,
+    voices: voices,
+    tokens: tokens,
+    dataDir: dataDir
+  )
+  let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
+  var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+
+  let myClass = MyClass()
+
+  // We use Unretained here so myClass must be kept alive as the callback is invoked
+  //
+  // See also
+  // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+  let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+
+  let callback: TtsCallbackWithArg = { samples, n, arg in
+    let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+    var savedSamples: [Float] = []
+    for index in 0..<n {
+      savedSamples.append(samples![Int(index)])
+    }
+
+    o.playSamples(samples: savedSamples)
+
+    // return 1 so that it continues generating
+    return 1
+  }
+
+  let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+
+  let text =
+    "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+  let sid = 0
+  let speed: Float = 1.0
+
+  let audio = tts.generateWithCallbackWithArg(
+    text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+  let filename = "test-kokoro-en.wav"
+  let ok = audio.save(filename: filename)
+  if ok == 1 {
+    print("\nSaved to:\(filename)")
+  } else {
+    print("Failed to save to \(filename)")
+  }
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}