Fangjun Kuang
Committed by GitHub

Add Swift API for TTS (#439)

#!/usr/bin/env bash
set -ex
echo "pwd: $PWD"
cd swift-api-examples
ls -lh
mkdir -p /Users/fangjun/Desktop
pushd /Users/fangjun/Desktop
wget -q https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
ls -lh
popd
./run-generate-subtitles.sh
ls -lh /Users/fangjun/Desktop
cat /Users/fangjun/Desktop/Obama.srt
./run-tts.sh
ls -lh
./run-decode-file.sh
./run-decode-file-non-streaming.sh
ls -lh
... ...
name: swift
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: swift-${{ github.ref }}
cancel-in-progress: true
jobs:
swift:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-13]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: ${{ matrix.os }}-swift
- name: Build
shell: bash
run: |
sudo mkdir -p /Users/fangjun/Desktop
sudo chmod a=rwx /Users/fangjun/Desktop
ls -lhd /Users/fangjun/Desktop
ls -lh /Users/fangjun/Desktop
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
./build-swift-macos.sh
- name: test
shell: bash
run: |
.github/scripts/test-swift.sh
... ...
decode-file
decode-file-non-streaming
generate-subtitles
tts
vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
!*.sh
... ...
... ... @@ -572,3 +572,110 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
SherpaOnnxVoiceActivityDetectorReset(vad)
}
}
// offline tts
func sherpaOnnxOfflineTtsVitsModelConfig(
model: String,
lexicon: String,
tokens: String,
noiseScale: Float = 0.667,
noiseScaleW: Float = 0.8,
lengthScale: Float = 1.0
) -> SherpaOnnxOfflineTtsVitsModelConfig {
return SherpaOnnxOfflineTtsVitsModelConfig(
model: toCPointer(model),
lexicon: toCPointer(lexicon),
tokens: toCPointer(tokens),
noise_scale: noiseScale,
noise_scale_w: noiseScaleW,
length_scale: lengthScale)
}
func sherpaOnnxOfflineTtsModelConfig(
vits: SherpaOnnxOfflineTtsVitsModelConfig,
numThreads: Int = 1,
debug: Int = 0,
provider: String = "cpu"
) -> SherpaOnnxOfflineTtsModelConfig {
return SherpaOnnxOfflineTtsModelConfig(
vits: vits,
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider)
)
}
func sherpaOnnxOfflineTtsConfig(
model: SherpaOnnxOfflineTtsModelConfig,
ruleFsts: String = ""
) -> SherpaOnnxOfflineTtsConfig {
return SherpaOnnxOfflineTtsConfig(
model: model,
rule_fsts: toCPointer(ruleFsts)
)
}
class SherpaOnnxGeneratedAudioWrapper {
/// A pointer to the underlying counterpart in C
let audio: UnsafePointer<SherpaOnnxGeneratedAudio>!
init(audio: UnsafePointer<SherpaOnnxGeneratedAudio>!) {
self.audio = audio
}
deinit {
if let audio {
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
}
}
var n: Int32 {
return audio.pointee.n
}
var sampleRate: Int32 {
return audio.pointee.sample_rate
}
var samples: [Float] {
if let p = audio.pointee.samples {
var samples: [Float] = []
for index in 0..<n {
samples.append(p[Int(index)])
}
return samples
} else {
let samples: [Float] = []
return samples
}
}
func save(filename: String) {
SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
}
}
class SherpaOnnxOfflineTtsWrapper {
/// A pointer to the underlying counterpart in C
let tts: OpaquePointer!
/// Constructor taking a model config
init(
config: UnsafePointer<SherpaOnnxOfflineTtsConfig>!
) {
tts = SherpaOnnxCreateOfflineTts(config)
}
deinit {
if let tts {
SherpaOnnxDestroyOfflineTts(tts)
}
}
func generate(text: String, sid: Int = 0, speed: Float = 1.0) -> SherpaOnnxGeneratedAudioWrapper {
let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? = SherpaOnnxOfflineTtsGenerate(
tts, toCPointer(text), Int32(sid), speed)
return SherpaOnnxGeneratedAudioWrapper(audio: audio)
}
}
... ...
... ... @@ -175,8 +175,8 @@ func run() {
var segments: [SpeechSegment] = []
for offset in stride(from: 0, to: array.count, by: windowSize) {
let end = min(offset + windowSize, array.count)
vad.acceptWaveform(samples: [Float](array[offset ..< end]))
let end = min(offset + windowSize, array.count)
vad.acceptWaveform(samples: [Float](array[offset..<end]))
}
var index: Int = 0
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi
if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
echo "Please download the pre-trained model for testing."
echo "You can refer to"
echo ""
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
echo ""
echo "for help"
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi
if [ ! -e ./decode-file-non-streaming ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./decode-file-non-streaming.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o decode-file-non-streaming
strip decode-file-non-streaming
else
echo "./decode-file-non-streaming exists - skip building"
fi
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./decode-file-non-streaming
... ...
... ... @@ -14,7 +14,10 @@ if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
echo ""
echo "for help"
exit 1
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
fi
if [ ! -e ./decode-file ]; then
... ... @@ -28,6 +31,8 @@ if [ ! -e ./decode-file ]; then
-l sherpa-onnx \
-l onnxruntime \
-o decode-file
strip decode-file
else
echo "./decode-file exists - skip building"
fi
... ...
... ... @@ -14,7 +14,15 @@ if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
echo ""
echo "for help"
exit 1
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
ls -lh sherpa-onnx-whisper-tiny.en
fi
if [ ! -f ./silero_vad.onnx ]; then
echo "downloading silero_vad"
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [ ! -e ./generate-subtitles ]; then
... ... @@ -28,6 +36,8 @@ if [ ! -e ./generate-subtitles ]; then
-l sherpa-onnx \
-l onnxruntime \
-o generate-subtitles
strip generate-subtitles
else
echo "./generate-subtitles exists - skip building"
fi
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi
if [ ! -d ./vits-vctk ]; then
echo "Please download the pre-trained model for testing."
echo "You can refer to"
echo ""
echo "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers"
echo ""
echo "for help"
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
tar xvf vits-vctk.tar.bz2
rm vits-vctk.tar.bz2
fi
if [ ! -e ./tts ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./tts.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o tts
strip tts
else
echo "./tts exists - skip building"
fi
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./tts
... ...
func run() {
let model = "./vits-vctk/vits-vctk.onnx"
let lexicon = "./vits-vctk/lexicon.txt"
let tokens = "./vits-vctk/tokens.txt"
let vits = sherpaOnnxOfflineTtsVitsModelConfig(
model: model,
lexicon: lexicon,
tokens: tokens
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
let text = "How are you doing? Fantastic!"
let sid = 99
let speed: Float = 1.0
let audio = tts.generate(text: text, sid: sid, speed: speed)
let filename = "test.wav"
audio.save(filename: filename)
print("\nSaved to:\n\(filename)")
}
@main
struct App {
static func main() {
run()
}
}
... ...