Fangjun Kuang
Committed by GitHub

Add Swift API for ten-vad (#2387)

... ... @@ -71,7 +71,11 @@ curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
ls -lh
popd
./run-generate-subtitles-ten-vad.sh
rm -rf *.onnx
./run-generate-subtitles.sh
rm -rf *.onnx
ls -lh /Users/fangjun/Desktop
cat /Users/fangjun/Desktop/Obama.srt
... ...
decode-file
decode-file-non-streaming
generate-subtitles
generate-subtitles-ten-vad
spoken-language-identification
tts-vits
vits-vctk
... ...
... ... @@ -386,6 +386,22 @@ func sherpaOnnxOfflineWhisperModelConfig(
)
}
func sherpaOnnxOfflineCanaryModelConfig(
encoder: String = "",
decoder: String = "",
srcLang: String = "en",
tgtLang: String = "en",
usePnc: Bool = true
) -> SherpaOnnxOfflineCanaryModelConfig {
return SherpaOnnxOfflineCanaryModelConfig(
encoder: toCPointer(encoder),
decoder: toCPointer(decoder),
src_lang: toCPointer(srcLang),
tgt_lang: toCPointer(tgtLang),
use_pnc: usePnc ? 1 : 0
)
}
func sherpaOnnxOfflineFireRedAsrModelConfig(
encoder: String = "",
decoder: String = ""
... ... @@ -459,7 +475,8 @@ func sherpaOnnxOfflineModelConfig(
fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(),
dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(),
zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig =
sherpaOnnxOfflineZipformerCtcModelConfig()
sherpaOnnxOfflineZipformerCtcModelConfig(),
canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig()
) -> SherpaOnnxOfflineModelConfig {
return SherpaOnnxOfflineModelConfig(
transducer: transducer,
... ... @@ -479,7 +496,8 @@ func sherpaOnnxOfflineModelConfig(
moonshine: moonshine,
fire_red_asr: fireRedAsr,
dolphin: dolphin,
zipformer_ctc: zipformerCtc
zipformer_ctc: zipformerCtc,
canary: canary
)
}
... ... @@ -607,10 +625,14 @@ class SherpaOnnxOfflineRecognizer {
return SherpaOnnxOfflineRecongitionResult(result: result)
}
func setConfig(config: UnsafePointer<SherpaOnnxOfflineRecognizerConfig>!) {
SherpaOnnxOfflineRecognizerSetConfig(recognizer, config)
}
}
func sherpaOnnxSileroVadModelConfig(
model: String,
model: String = "",
threshold: Float = 0.5,
minSilenceDuration: Float = 0.25,
minSpeechDuration: Float = 0.5,
... ... @@ -627,19 +649,39 @@ func sherpaOnnxSileroVadModelConfig(
)
}
func sherpaOnnxTenVadModelConfig(
model: String = "",
threshold: Float = 0.5,
minSilenceDuration: Float = 0.25,
minSpeechDuration: Float = 0.5,
windowSize: Int = 256,
maxSpeechDuration: Float = 5.0
) -> SherpaOnnxTenVadModelConfig {
return SherpaOnnxTenVadModelConfig(
model: toCPointer(model),
threshold: threshold,
min_silence_duration: minSilenceDuration,
min_speech_duration: minSpeechDuration,
window_size: Int32(windowSize),
max_speech_duration: maxSpeechDuration
)
}
func sherpaOnnxVadModelConfig(
sileroVad: SherpaOnnxSileroVadModelConfig,
sileroVad: SherpaOnnxSileroVadModelConfig = sherpaOnnxSileroVadModelConfig(),
sampleRate: Int32 = 16000,
numThreads: Int = 1,
provider: String = "cpu",
debug: Int = 0
debug: Int = 0,
tenVad: SherpaOnnxTenVadModelConfig = sherpaOnnxTenVadModelConfig()
) -> SherpaOnnxVadModelConfig {
return SherpaOnnxVadModelConfig(
silero_vad: sileroVad,
sample_rate: sampleRate,
num_threads: Int32(numThreads),
provider: toCPointer(provider),
debug: Int32(debug)
debug: Int32(debug),
ten_vad: tenVad
)
}
... ...
... ... @@ -156,11 +156,35 @@ func run() {
assert(audioFormat.channelCount == 1)
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
let sileroVadConfig = sherpaOnnxSileroVadModelConfig(
model: "./silero_vad.onnx"
var sileroVadConfig = sherpaOnnxSileroVadModelConfig()
var tenVadConfig = sherpaOnnxTenVadModelConfig()
var windowSize = 0
if FileManager.default.fileExists(atPath: "./silero_vad.onnx") {
sileroVadConfig = sherpaOnnxSileroVadModelConfig(
model: "./silero_vad.onnx",
threshold: 0.25,
windowSize: 512
)
windowSize = 512
print("Use silero-vad")
} else if FileManager.default.fileExists(atPath: "./ten-vad.onnx") {
tenVadConfig = sherpaOnnxTenVadModelConfig(
model: "./ten-vad.onnx",
threshold: 0.25,
windowSize: 256
)
windowSize = 256
print("Use ten-vad")
} else {
print("Please provide ./silero_vad.onnx or ./ten-vad.onnx")
return
}
var vadModelConfig = sherpaOnnxVadModelConfig(
sileroVad: sileroVadConfig, tenVad: tenVadConfig)
var vadModelConfig = sherpaOnnxVadModelConfig(sileroVad: sileroVadConfig)
let vad = SherpaOnnxVoiceActivityDetectorWrapper(
config: &vadModelConfig, buffer_size_in_seconds: 120)
... ... @@ -170,8 +194,6 @@ func run() {
try! audioFile.read(into: audioFileBuffer!)
var array: [Float]! = audioFileBuffer?.array()
let windowSize = Int(vadModelConfig.silero_vad.window_size)
var segments: [SpeechSegment] = []
for offset in stride(from: 0, to: array.count, by: windowSize) {
... ... @@ -180,7 +202,6 @@ func run() {
}
vad.flush()
var index: Int = 0
while !vad.isEmpty() {
let s = vad.front()
vad.pop()
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi
if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
echo "Please download the pre-trained model for testing."
echo "You can refer to"
echo ""
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
echo ""
echo "for help"
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
ls -lh sherpa-onnx-whisper-tiny.en
fi
if [ ! -f ./ten-vad.onnx ]; then
echo "downloading ten-vad"
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
fi
if [ ! -e ./generate-subtitles-ten-vad ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./generate-subtitles.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o generate-subtitles-ten-vad
strip generate-subtitles-ten-vad
else
echo "./generate-subtitles-ten-vad exists - skip building"
fi
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./generate-subtitles-ten-vad
... ...