Fangjun Kuang
Committed by GitHub

Add Swift API for speech enhancement GTCRN models (#1989)

@@ -7,6 +7,8 @@ echo "pwd: $PWD" @@ -7,6 +7,8 @@ echo "pwd: $PWD"
7 cd swift-api-examples 7 cd swift-api-examples
8 ls -lh 8 ls -lh
9 9
  10 +./run-speech-enhancement-gtcrn.sh
  11 +ls -lh *.wav
10 12
11 ./run-fire-red-asr.sh 13 ./run-fire-red-asr.sh
12 rm -rf sherpa-onnx-fire-red-asr-* 14 rm -rf sherpa-onnx-fire-red-asr-*
@@ -14,3 +14,4 @@ tts-matcha-zh @@ -14,3 +14,4 @@ tts-matcha-zh
14 tts-matcha-en 14 tts-matcha-en
15 tts-kokoro-en 15 tts-kokoro-en
16 tts-kokoro-zh-en 16 tts-kokoro-zh-en
  17 +speech-enhancement-gtcrn
@@ -1323,3 +1323,101 @@ class SherpaOnnxOfflineSpeakerDiarizationWrapper { @@ -1323,3 +1323,101 @@ class SherpaOnnxOfflineSpeakerDiarizationWrapper {
1323 return ans 1323 return ans
1324 } 1324 }
1325 } 1325 }
  1326 +
  1327 +func sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: String = "")
  1328 + -> SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
  1329 +{
  1330 + return SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: toCPointer(model))
  1331 +}
  1332 +
  1333 +func sherpaOnnxOfflineSpeechDenoiserModelConfig(
  1334 + gtcrn: SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig =
  1335 + sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(),
  1336 + numThreads: Int = 1,
  1337 + provider: String = "cpu",
  1338 + debug: Int = 0
  1339 +) -> SherpaOnnxOfflineSpeechDenoiserModelConfig {
  1340 + return SherpaOnnxOfflineSpeechDenoiserModelConfig(
  1341 + gtcrn: gtcrn,
  1342 + num_threads: Int32(numThreads),
  1343 + debug: Int32(debug),
  1344 + provider: toCPointer(provider)
  1345 + )
  1346 +}
  1347 +
  1348 +func sherpaOnnxOfflineSpeechDenoiserConfig(
  1349 + model: SherpaOnnxOfflineSpeechDenoiserModelConfig =
  1350 + sherpaOnnxOfflineSpeechDenoiserModelConfig()
  1351 +) -> SherpaOnnxOfflineSpeechDenoiserConfig {
  1352 + return SherpaOnnxOfflineSpeechDenoiserConfig(
  1353 + model: model)
  1354 +}
  1355 +
  1356 +class SherpaOnnxDenoisedAudioWrapper {
  1357 + /// A pointer to the underlying counterpart in C
  1358 + let audio: UnsafePointer<SherpaOnnxDenoisedAudio>!
  1359 +
  1360 + init(audio: UnsafePointer<SherpaOnnxDenoisedAudio>!) {
  1361 + self.audio = audio
  1362 + }
  1363 +
  1364 + deinit {
  1365 + if let audio {
  1366 + SherpaOnnxDestroyDenoisedAudio(audio)
  1367 + }
  1368 + }
  1369 +
  1370 + var n: Int32 {
  1371 + return audio.pointee.n
  1372 + }
  1373 +
  1374 + var sampleRate: Int32 {
  1375 + return audio.pointee.sample_rate
  1376 + }
  1377 +
  1378 + var samples: [Float] {
  1379 + if let p = audio.pointee.samples {
  1380 + var samples: [Float] = []
  1381 + for index in 0..<n {
  1382 + samples.append(p[Int(index)])
  1383 + }
  1384 + return samples
  1385 + } else {
  1386 + let samples: [Float] = []
  1387 + return samples
  1388 + }
  1389 + }
  1390 +
  1391 + func save(filename: String) -> Int32 {
  1392 + return SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
  1393 + }
  1394 +}
  1395 +
  1396 +class SherpaOnnxOfflineSpeechDenoiserWrapper {
  1397 + /// A pointer to the underlying counterpart in C
  1398 + let impl: OpaquePointer!
  1399 +
  1400 + /// Constructor taking a model config
  1401 + init(
  1402 + config: UnsafePointer<SherpaOnnxOfflineSpeechDenoiserConfig>!
  1403 + ) {
  1404 + impl = SherpaOnnxCreateOfflineSpeechDenoiser(config)
  1405 + }
  1406 +
  1407 + deinit {
  1408 + if let impl {
  1409 + SherpaOnnxDestroyOfflineSpeechDenoiser(impl)
  1410 + }
  1411 + }
  1412 +
  1413 + func run(samples: [Float], sampleRate: Int) -> SherpaOnnxDenoisedAudioWrapper {
  1414 + let audio: UnsafePointer<SherpaOnnxDenoisedAudio>? = SherpaOnnxOfflineSpeechDenoiserRun(
  1415 + impl, samples, Int32(samples.count), Int32(sampleRate))
  1416 +
  1417 + return SherpaOnnxDenoisedAudioWrapper(audio: audio)
  1418 + }
  1419 +
  1420 + var sampleRate: Int {
  1421 + return Int(SherpaOnnxOfflineSpeechDenoiserGetSampleRate(impl))
  1422 + }
  1423 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -f ./gtcrn_simple.onnx ]; then
  11 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
  12 +fi
  13 +
  14 +if [ ! -f ./inp_16k.wav ]; then
  15 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
  16 +fi
  17 +
  18 +if [ ! -e ./speech-enhancement-gtcrn ]; then
  19 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  20 + swiftc \
  21 + -lc++ \
  22 + -I ../build-swift-macos/install/include \
  23 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  24 + ./speech-enhancement-gtcrn.swift ./SherpaOnnx.swift \
  25 + -L ../build-swift-macos/install/lib/ \
  26 + -l sherpa-onnx \
  27 + -l onnxruntime \
  28 + -o speech-enhancement-gtcrn
  29 +
  30 + strip speech-enhancement-gtcrn
  31 +else
  32 + echo "./speech-enhancement-gtcrn exists - skip building"
  33 +fi
  34 +
  35 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  36 +./speech-enhancement-gtcrn
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + // Please refer to
  17 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
  18 + // to download files used in this script
  19 + var config = sherpaOnnxOfflineSpeechDenoiserConfig(
  20 + model: sherpaOnnxOfflineSpeechDenoiserModelConfig(
  21 + gtcrn: sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: "./gtcrn_simple.onnx"))
  22 + )
  23 +
  24 + let sd = SherpaOnnxOfflineSpeechDenoiserWrapper(config: &config)
  25 +
  26 + let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav")
  27 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  28 +
  29 + let audioFormat = audioFile.processingFormat
  30 + assert(audioFormat.sampleRate == 16000)
  31 + assert(audioFormat.channelCount == 1)
  32 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  33 +
  34 + let audioFrameCount = UInt32(audioFile.length)
  35 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  36 +
  37 + try! audioFile.read(into: audioFileBuffer!)
  38 + let array: [Float]! = audioFileBuffer?.array()
  39 + let audio = sd.run(samples: array, sampleRate: Int(audioFormat.sampleRate))
  40 +
  41 + let filename = "enhanced_16k.wav"
  42 + let ok = audio.save(filename: filename)
  43 + if ok == 1 {
  44 + print("\nSaved to:\(filename)")
  45 + } else {
  46 + print("Failed to save to \(filename)")
  47 + }
  48 +}
  49 +
  50 +@main
  51 +struct App {
  52 + static func main() {
  53 + run()
  54 + }
  55 +}