Fangjun Kuang
Committed by GitHub

Add Swift API for speech enhancement GTCRN models (#1989)

... ... @@ -7,6 +7,8 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh
./run-speech-enhancement-gtcrn.sh
ls -lh *.wav
./run-fire-red-asr.sh
rm -rf sherpa-onnx-fire-red-asr-*
... ...
... ... @@ -14,3 +14,4 @@ tts-matcha-zh
tts-matcha-en
tts-kokoro-en
tts-kokoro-zh-en
speech-enhancement-gtcrn
... ...
... ... @@ -1323,3 +1323,101 @@ class SherpaOnnxOfflineSpeakerDiarizationWrapper {
return ans
}
}
func sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: String = "")
-> SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
{
return SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: toCPointer(model))
}
func sherpaOnnxOfflineSpeechDenoiserModelConfig(
gtcrn: SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig =
sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(),
numThreads: Int = 1,
provider: String = "cpu",
debug: Int = 0
) -> SherpaOnnxOfflineSpeechDenoiserModelConfig {
return SherpaOnnxOfflineSpeechDenoiserModelConfig(
gtcrn: gtcrn,
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider)
)
}
func sherpaOnnxOfflineSpeechDenoiserConfig(
model: SherpaOnnxOfflineSpeechDenoiserModelConfig =
sherpaOnnxOfflineSpeechDenoiserModelConfig()
) -> SherpaOnnxOfflineSpeechDenoiserConfig {
return SherpaOnnxOfflineSpeechDenoiserConfig(
model: model)
}
class SherpaOnnxDenoisedAudioWrapper {
/// A pointer to the underlying counterpart in C
let audio: UnsafePointer<SherpaOnnxDenoisedAudio>!
init(audio: UnsafePointer<SherpaOnnxDenoisedAudio>!) {
self.audio = audio
}
deinit {
if let audio {
SherpaOnnxDestroyDenoisedAudio(audio)
}
}
var n: Int32 {
return audio.pointee.n
}
var sampleRate: Int32 {
return audio.pointee.sample_rate
}
var samples: [Float] {
if let p = audio.pointee.samples {
var samples: [Float] = []
for index in 0..<n {
samples.append(p[Int(index)])
}
return samples
} else {
let samples: [Float] = []
return samples
}
}
func save(filename: String) -> Int32 {
return SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
}
}
class SherpaOnnxOfflineSpeechDenoiserWrapper {
/// A pointer to the underlying counterpart in C
let impl: OpaquePointer!
/// Constructor taking a model config
init(
config: UnsafePointer<SherpaOnnxOfflineSpeechDenoiserConfig>!
) {
impl = SherpaOnnxCreateOfflineSpeechDenoiser(config)
}
deinit {
if let impl {
SherpaOnnxDestroyOfflineSpeechDenoiser(impl)
}
}
func run(samples: [Float], sampleRate: Int) -> SherpaOnnxDenoisedAudioWrapper {
let audio: UnsafePointer<SherpaOnnxDenoisedAudio>? = SherpaOnnxOfflineSpeechDenoiserRun(
impl, samples, Int32(samples.count), Int32(sampleRate))
return SherpaOnnxDenoisedAudioWrapper(audio: audio)
}
var sampleRate: Int {
return Int(SherpaOnnxOfflineSpeechDenoiserGetSampleRate(impl))
}
}
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi
if [ ! -f ./gtcrn_simple.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi
if [ ! -f ./inp_16k.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi
if [ ! -e ./speech-enhancement-gtcrn ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./speech-enhancement-gtcrn.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o speech-enhancement-gtcrn
strip speech-enhancement-gtcrn
else
echo "./speech-enhancement-gtcrn exists - skip building"
fi
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./speech-enhancement-gtcrn
... ...
import AVFoundation
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}
func run() {
// Please refer to
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
// to download files used in this script
var config = sherpaOnnxOfflineSpeechDenoiserConfig(
model: sherpaOnnxOfflineSpeechDenoiserModelConfig(
gtcrn: sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: "./gtcrn_simple.onnx"))
)
let sd = SherpaOnnxOfflineSpeechDenoiserWrapper(config: &config)
let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav")
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
let audioFormat = audioFile.processingFormat
assert(audioFormat.sampleRate == 16000)
assert(audioFormat.channelCount == 1)
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
let audioFrameCount = UInt32(audioFile.length)
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
try! audioFile.read(into: audioFileBuffer!)
let array: [Float]! = audioFileBuffer?.array()
let audio = sd.run(samples: array, sampleRate: Int(audioFormat.sampleRate))
let filename = "enhanced_16k.wav"
let ok = audio.save(filename: filename)
if ok == 1 {
print("\nSaved to:\(filename)")
} else {
print("Failed to save to \(filename)")
}
}
@main
struct App {
static func main() {
run()
}
}
... ...