Add Swift API for Kokoro TTS 1.0 (#1803)

Fangjun Kuang · GitHub
Commit e2e0f25100d73cd6dacdf607061d3735294089c5 e2e0f251 1 parent d8152047
.github/scripts/test-swift.sh
cxx-api-examples/kokoro-tts-en-cxx-api.cc
cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/run-tts-kokoro-zh-en.sh
swift-api-examples/tts-kokoro-zh-en.swift
--- a/.github/scripts/test-swift.sh
查看文件 @e2e0f25
+++ b/.github/scripts/test-swift.sh
查看文件 @e2e0f25
@@ -11,6 +11,10 @@ ls -lh
 ls -lh
 rm -rf vits-piper-*
 
+ ./run-tts-kokoro-zh-en.sh
+ ls -lh
+ rm -rf kokoro-multi-*
+ 
 ./run-tts-kokoro-en.sh
 ls -lh
 rm -rf kokoro-en-*
--- a/cxx-api-examples/kokoro-tts-en-cxx-api.cc
查看文件 @e2e0f25
+++ b/cxx-api-examples/kokoro-tts-en-cxx-api.cc
查看文件 @e2e0f25
@@ -3,7 +3,7 @@
 // Copyright (c)  2025  Xiaomi Corporation
 
 // This file shows how to use sherpa-onnx CXX API
- // for Chinese TTS with Kokoro.
+ // for English TTS with Kokoro.
 //
 // clang-format off
 /*
--- a/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
查看文件 @e2e0f25
+++ b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
查看文件 @e2e0f25
@@ -3,7 +3,7 @@
 // Copyright (c)  2025  Xiaomi Corporation
 
 // This file shows how to use sherpa-onnx CXX API
- // for Chinese TTS with Kokoro.
+ // for Chinese + English TTS with Kokoro.
 //
 // clang-format off
 /*
--- a/swift-api-examples/.gitignore
查看文件 @e2e0f25
+++ b/swift-api-examples/.gitignore
查看文件 @e2e0f25
@@ -13,3 +13,4 @@ add-punctuations
 tts-matcha-zh
 tts-matcha-en
 tts-kokoro-en
+ tts-kokoro-zh-en
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @e2e0f25
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @e2e0f25
@@ -767,14 +767,18 @@ func sherpaOnnxOfflineTtsKokoroModelConfig(
   voices: String = "",
   tokens: String = "",
   dataDir: String = "",
-   lengthScale: Float = 1.0
+   lengthScale: Float = 1.0,
+   dictDir: String = "",
+   lexicon: String = ""
 ) -> SherpaOnnxOfflineTtsKokoroModelConfig {
   return SherpaOnnxOfflineTtsKokoroModelConfig(
     model: toCPointer(model),
     voices: toCPointer(voices),
     tokens: toCPointer(tokens),
     data_dir: toCPointer(dataDir),
-     length_scale: lengthScale
+     length_scale: lengthScale,
+     dict_dir: toCPointer(dictDir),
+     lexicon: toCPointer(lexicon)
   )
 }
 
--- a/swift-api-examples/run-tts-kokoro-zh-en.sh 0 → 100755
查看文件 @e2e0f25
+++ b/swift-api-examples/run-tts-kokoro-zh-en.sh 0 → 100755
查看文件 @e2e0f25
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
+ # to download more models
+ if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
+   tar xf kokoro-multi-lang-v1_0.tar.bz2
+   rm kokoro-multi-lang-v1_0.tar.bz2
+ fi
+ 
+ if [ ! -e ./tts-kokoro-zh-en ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./tts-kokoro-zh-en.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o tts-kokoro-zh-en
+ 
+   strip tts-kokoro-zh-en
+ else
+   echo "./tts-kokoro-zh-en exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./tts-kokoro-zh-en
--- a/swift-api-examples/tts-kokoro-zh-en.swift 0 → 100644
查看文件 @e2e0f25
+++ b/swift-api-examples/tts-kokoro-zh-en.swift 0 → 100644
查看文件 @e2e0f25
+ class MyClass {
+   func playSamples(samples: [Float]) {
+     print("Play \(samples.count) samples")
+   }
+ }
+ 
+ func run() {
+   let model = "./kokoro-multi-lang-v1_0/model.onnx"
+   let voices = "./kokoro-multi-lang-v1_0/voices.bin"
+   let tokens = "./kokoro-multi-lang-v1_0/tokens.txt"
+   let dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data"
+   let dictDir = "./kokoro-multi-lang-v1_0/dict"
+   let lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt"
+   let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig(
+     model: model,
+     voices: voices,
+     tokens: tokens,
+     dataDir: dataDir,
+     dictDir: dictDir,
+     lexicon: lexicon
+   )
+   let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0)
+   var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+ 
+   let myClass = MyClass()
+ 
+   // We use Unretained here so myClass must be kept alive as the callback is invoked
+   //
+   // See also
+   // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+   let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+ 
+   let callback: TtsCallbackWithArg = { samples, n, arg in
+     let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+     var savedSamples: [Float] = []
+     for index in 0..<n {
+       savedSamples.append(samples![Int(index)])
+     }
+ 
+     o.playSamples(samples: savedSamples)
+ 
+     // return 1 so that it continues generating
+     return 1
+   }
+ 
+   let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+ 
+   let text =
+     "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢？"
+   let sid = 0
+   let speed: Float = 1.0
+ 
+   let audio = tts.generateWithCallbackWithArg(
+     text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+   let filename = "test-kokoro-zh-en.wav"
+   let ok = audio.save(filename: filename)
+   if ok == 1 {
+     print("\nSaved to:\(filename)")
+   } else {
+     print("Failed to save to \(filename)")
+   }
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }