Add Swift API for MatchaTTS models. (#1684)

Fangjun Kuang · GitHub
Commit 6f085babcc51dd05a411b6949318c401a5adbf3f 6f085bab 1 parent 1fe5fe49
.github/scripts/test-swift.sh
java-api-examples/run-non-streaming-tts-matcha-en.sh
nodejs-addon-examples/README.md
nodejs-examples/README.md
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/run-tts-matcha-en.sh
swift-api-examples/run-tts-matcha-zh.sh
swift-api-examples/run-tts.sh → swift-api-examples/run-tts-vits.sh
swift-api-examples/tts-matcha-en.swift
swift-api-examples/tts-matcha-zh.swift
swift-api-examples/tts.swift → swift-api-examples/tts-vits.swift
--- a/.github/scripts/test-swift.sh
查看文件 @6f085ba
+++ b/.github/scripts/test-swift.sh
查看文件 @6f085ba
@@ -7,6 +7,18 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+ ./run-tts-vits.sh
+ ls -lh
+ rm -rf vits-piper-*
+ 
+ ./run-tts-matcha-zh.sh
+ ls -lh
+ rm -rf matcha-icefall-*
+ 
+ ./run-tts-matcha-en.sh
+ ls -lh
+ rm -rf matcha-icefall-*
+ 
 ./run-speaker-diarization.sh
 rm -rf *.onnx
 rm -rf sherpa-onnx-pyannote-segmentation-3-0
@@ -38,8 +50,9 @@ popd
 ls -lh /Users/fangjun/Desktop
 cat /Users/fangjun/Desktop/Obama.srt
 
- ./run-tts.sh
- ls -lh
+ rm -rf sherpa-onnx-whisper*
+ rm -f *.onnx
+ rm /Users/fangjun/Desktop/Obama.wav
 
 ./run-decode-file.sh
 rm decode-file
@@ -48,5 +61,4 @@ sed -i.bak  '20d' ./decode-file.swift
 
 ./run-decode-file-non-streaming.sh
 
- 
 ls -lh
--- a/java-api-examples/run-non-streaming-tts-matcha-en.sh
查看文件 @6f085ba
+++ b/java-api-examples/run-non-streaming-tts-matcha-en.sh
查看文件 @6f085ba
@@ -31,7 +31,7 @@ fi
 # to download more models
 if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
-   tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+   tar xf matcha-icefall-en_US-ljspeech.tar.bz2
   rm matcha-icefall-en_US-ljspeech.tar.bz2
 fi
 
--- a/nodejs-addon-examples/README.md
查看文件 @6f085ba
+++ b/nodejs-addon-examples/README.md
查看文件 @6f085ba
@@ -350,7 +350,7 @@ node ./test_vad_asr_non_streaming_sense_voice_microphone.js
 ### Text-to-speech with MatchaTTS models (English TTS)
 ```bash
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
- tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xf matcha-icefall-en_US-ljspeech.tar.bz2
 rm matcha-icefall-en_US-ljspeech.tar.bz2
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
--- a/nodejs-examples/README.md
查看文件 @6f085ba
+++ b/nodejs-examples/README.md
查看文件 @6f085ba
@@ -70,7 +70,7 @@ You can use the following command to run it:
 
 ```bash
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
- tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xf matcha-icefall-en_US-ljspeech.tar.bz2
 rm matcha-icefall-en_US-ljspeech.tar.bz2
 
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
--- a/swift-api-examples/.gitignore
查看文件 @6f085ba
+++ b/swift-api-examples/.gitignore
查看文件 @6f085ba
@@ -2,7 +2,7 @@ decode-file
 decode-file-non-streaming
 generate-subtitles
 spoken-language-identification
- tts
+ tts-vits
 vits-vctk
 sherpa-onnx-paraformer-zh-2023-09-14
 !*.sh
@@ -10,3 +10,5 @@ sherpa-onnx-paraformer-zh-2023-09-14
 streaming-hlg-decode-file
 keyword-spotting-from-file
 add-punctuations
+ tts-matcha-zh
+ tts-matcha-en
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @6f085ba
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @6f085ba
@@ -719,9 +719,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
 
 // offline tts
 func sherpaOnnxOfflineTtsVitsModelConfig(
-   model: String,
-   lexicon: String,
-   tokens: String,
+   model: String = "",
+   lexicon: String = "",
+   tokens: String = "",
   dataDir: String = "",
   noiseScale: Float = 0.667,
   noiseScaleW: Float = 0.8,
@@ -739,8 +739,30 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
     dict_dir: toCPointer(dictDir))
 }
 
+ func sherpaOnnxOfflineTtsMatchaModelConfig(
+   acousticModel: String = "",
+   vocoder: String = "",
+   lexicon: String = "",
+   tokens: String = "",
+   dataDir: String = "",
+   noiseScale: Float = 0.667,
+   lengthScale: Float = 1.0,
+   dictDir: String = ""
+ ) -> SherpaOnnxOfflineTtsMatchaModelConfig {
+   return SherpaOnnxOfflineTtsMatchaModelConfig(
+     acoustic_model: toCPointer(acousticModel),
+     vocoder: toCPointer(vocoder),
+     lexicon: toCPointer(lexicon),
+     tokens: toCPointer(tokens),
+     data_dir: toCPointer(dataDir),
+     noise_scale: noiseScale,
+     length_scale: lengthScale,
+     dict_dir: toCPointer(dictDir))
+ }
+ 
 func sherpaOnnxOfflineTtsModelConfig(
-   vits: SherpaOnnxOfflineTtsVitsModelConfig,
+   vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
+   matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
   numThreads: Int = 1,
   debug: Int = 0,
   provider: String = "cpu"
@@ -749,7 +771,8 @@ func sherpaOnnxOfflineTtsModelConfig(
     vits: vits,
     num_threads: Int32(numThreads),
     debug: Int32(debug),
-     provider: toCPointer(provider)
+     provider: toCPointer(provider),
+     matcha: matcha
   )
 }
 
--- a/swift-api-examples/run-tts-matcha-en.sh 0 → 100755
查看文件 @6f085ba
+++ b/swift-api-examples/run-tts-matcha-en.sh 0 → 100755
查看文件 @6f085ba
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ # matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ # to download more models
+ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+   tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+   rm matcha-icefall-en_US-ljspeech.tar.bz2
+ fi
+ 
+ if [ ! -f ./hifigan_v2.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ fi
+ 
+ if [ ! -e ./tts ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./tts-matcha-en.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o tts-matcha-en
+ 
+   strip tts-matcha-en
+ else
+   echo "./tts-matcha-en exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./tts-matcha-en
--- a/swift-api-examples/run-tts-matcha-zh.sh 0 → 100755
查看文件 @6f085ba
+++ b/swift-api-examples/run-tts-matcha-zh.sh 0 → 100755
查看文件 @6f085ba
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+ # to download more models
+ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+   tar xvf matcha-icefall-zh-baker.tar.bz2
+   rm matcha-icefall-zh-baker.tar.bz2
+ fi
+ 
+ if [ ! -f ./hifigan_v2.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ fi
+ 
+ if [ ! -e ./tts ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./tts-matcha-zh.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o tts-matcha-zh
+ 
+   strip tts-matcha-zh
+ else
+   echo "./tts-matcha-zh exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./tts-matcha-zh
--- a/swift-api-examples/run-tts.sh → swift-api-examples/run-tts-vits.sh
查看文件 @6f085ba
+++ b/swift-api-examples/run-tts.sh → swift-api-examples/run-tts-vits.sh
查看文件 @6f085ba
@@ -21,16 +21,16 @@ if [ ! -e ./tts ]; then
     -lc++ \
     -I ../build-swift-macos/install/include \
     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
-     ./tts.swift  ./SherpaOnnx.swift \
+     ./tts-vits.swift  ./SherpaOnnx.swift \
     -L ../build-swift-macos/install/lib/ \
     -l sherpa-onnx \
     -l onnxruntime \
-     -o tts
+     -o tts-vits
 
-   strip tts
+   strip tts-vits
 else
-   echo "./tts exists - skip building"
+   echo "./tts-vits exists - skip building"
 fi
 
 export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
- ./tts
+ ./tts-vits
--- a/swift-api-examples/tts-matcha-en.swift 0 → 100644
查看文件 @6f085ba
+++ b/swift-api-examples/tts-matcha-en.swift 0 → 100644
查看文件 @6f085ba
+ class MyClass {
+   func playSamples(samples: [Float]) {
+     print("Play \(samples.count) samples")
+   }
+ }
+ 
+ func run() {
+   let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
+   let vocoder = "./hifigan_v2.onnx"
+   let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
+   let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
+   let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
+     acousticModel: acousticModel,
+     vocoder: vocoder,
+     tokens: tokens,
+     dataDir: dataDir
+   )
+   let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
+   var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
+ 
+   let myClass = MyClass()
+ 
+   // We use Unretained here so myClass must be kept alive as the callback is invoked
+   //
+   // See also
+   // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+   let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+ 
+   let callback: TtsCallbackWithArg = { samples, n, arg in
+     let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+     var savedSamples: [Float] = []
+     for index in 0..<n {
+       savedSamples.append(samples![Int(index)])
+     }
+ 
+     o.playSamples(samples: savedSamples)
+ 
+     // return 1 so that it continues generating
+     return 1
+   }
+ 
+   let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+ 
+   let text =
+     "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+   let sid = 0
+   let speed: Float = 1.0
+ 
+   let audio = tts.generateWithCallbackWithArg(
+     text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+   let filename = "test-matcha-en.wav"
+   let ok = audio.save(filename: filename)
+   if ok == 1 {
+     print("\nSaved to:\(filename)")
+   } else {
+     print("Failed to save to \(filename)")
+   }
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }
--- a/swift-api-examples/tts-matcha-zh.swift 0 → 100644
查看文件 @6f085ba
+++ b/swift-api-examples/tts-matcha-zh.swift 0 → 100644
查看文件 @6f085ba
+ class MyClass {
+   func playSamples(samples: [Float]) {
+     print("Play \(samples.count) samples")
+   }
+ }
+ 
+ func run() {
+   let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
+   let vocoder = "./hifigan_v2.onnx"
+   let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
+   let tokens = "./matcha-icefall-zh-baker/tokens.txt"
+   let dictDir = "./matcha-icefall-zh-baker/dict"
+   let ruleFsts =
+     "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
+   let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
+     acousticModel: acousticModel,
+     vocoder: vocoder,
+     lexicon: lexicon,
+     tokens: tokens,
+     dictDir: dictDir
+   )
+   let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
+   var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts)
+ 
+   let myClass = MyClass()
+ 
+   // We use Unretained here so myClass must be kept alive as the callback is invoked
+   //
+   // See also
+   // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
+   let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
+ 
+   let callback: TtsCallbackWithArg = { samples, n, arg in
+     let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
+     var savedSamples: [Float] = []
+     for index in 0..<n {
+       savedSamples.append(samples![Int(index)])
+     }
+ 
+     o.playSamples(samples: savedSamples)
+ 
+     // return 1 so that it continues generating
+     return 1
+   }
+ 
+   let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
+ 
+   let text = "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
+   let sid = 0
+   let speed: Float = 1.0
+ 
+   let audio = tts.generateWithCallbackWithArg(
+     text: text, callback: callback, arg: arg, sid: sid, speed: speed)
+   let filename = "test-matcha-zh.wav"
+   let ok = audio.save(filename: filename)
+   if ok == 1 {
+     print("\nSaved to:\(filename)")
+   } else {
+     print("Failed to save to \(filename)")
+   }
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }
--- a/swift-api-examples/tts.swift → swift-api-examples/tts-vits.swift
查看文件 @6f085ba
+++ b/swift-api-examples/tts.swift → swift-api-examples/tts-vits.swift
查看文件 @6f085ba
@@ -47,7 +47,7 @@ func run() {
 
   let audio = tts.generateWithCallbackWithArg(
     text: text, callback: callback, arg: arg, sid: sid, speed: speed)
-   let filename = "test.wav"
+   let filename = "test-vits-en.wav"
   let ok = audio.save(filename: filename)
   if ok == 1 {
     print("\nSaved to:\(filename)")