Committed by
GitHub
Add Swift API for speech enhancement GTCRN models (#1989)
正在显示
5 个修改的文件
包含
192 行增加
和
0 行删除
| @@ -7,6 +7,8 @@ echo "pwd: $PWD" | @@ -7,6 +7,8 @@ echo "pwd: $PWD" | ||
| 7 | cd swift-api-examples | 7 | cd swift-api-examples |
| 8 | ls -lh | 8 | ls -lh |
| 9 | 9 | ||
| 10 | +./run-speech-enhancement-gtcrn.sh | ||
| 11 | +ls -lh *.wav | ||
| 10 | 12 | ||
| 11 | ./run-fire-red-asr.sh | 13 | ./run-fire-red-asr.sh |
| 12 | rm -rf sherpa-onnx-fire-red-asr-* | 14 | rm -rf sherpa-onnx-fire-red-asr-* |
| @@ -1323,3 +1323,101 @@ class SherpaOnnxOfflineSpeakerDiarizationWrapper { | @@ -1323,3 +1323,101 @@ class SherpaOnnxOfflineSpeakerDiarizationWrapper { | ||
| 1323 | return ans | 1323 | return ans |
| 1324 | } | 1324 | } |
| 1325 | } | 1325 | } |
| 1326 | + | ||
| 1327 | +func sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: String = "") | ||
| 1328 | + -> SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig | ||
| 1329 | +{ | ||
| 1330 | + return SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: toCPointer(model)) | ||
| 1331 | +} | ||
| 1332 | + | ||
| 1333 | +func sherpaOnnxOfflineSpeechDenoiserModelConfig( | ||
| 1334 | + gtcrn: SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig = | ||
| 1335 | + sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(), | ||
| 1336 | + numThreads: Int = 1, | ||
| 1337 | + provider: String = "cpu", | ||
| 1338 | + debug: Int = 0 | ||
| 1339 | +) -> SherpaOnnxOfflineSpeechDenoiserModelConfig { | ||
| 1340 | + return SherpaOnnxOfflineSpeechDenoiserModelConfig( | ||
| 1341 | + gtcrn: gtcrn, | ||
| 1342 | + num_threads: Int32(numThreads), | ||
| 1343 | + debug: Int32(debug), | ||
| 1344 | + provider: toCPointer(provider) | ||
| 1345 | + ) | ||
| 1346 | +} | ||
| 1347 | + | ||
| 1348 | +func sherpaOnnxOfflineSpeechDenoiserConfig( | ||
| 1349 | + model: SherpaOnnxOfflineSpeechDenoiserModelConfig = | ||
| 1350 | + sherpaOnnxOfflineSpeechDenoiserModelConfig() | ||
| 1351 | +) -> SherpaOnnxOfflineSpeechDenoiserConfig { | ||
| 1352 | + return SherpaOnnxOfflineSpeechDenoiserConfig( | ||
| 1353 | + model: model) | ||
| 1354 | +} | ||
| 1355 | + | ||
| 1356 | +class SherpaOnnxDenoisedAudioWrapper { | ||
| 1357 | + /// A pointer to the underlying counterpart in C | ||
| 1358 | + let audio: UnsafePointer<SherpaOnnxDenoisedAudio>! | ||
| 1359 | + | ||
| 1360 | + init(audio: UnsafePointer<SherpaOnnxDenoisedAudio>!) { | ||
| 1361 | + self.audio = audio | ||
| 1362 | + } | ||
| 1363 | + | ||
| 1364 | + deinit { | ||
| 1365 | + if let audio { | ||
| 1366 | + SherpaOnnxDestroyDenoisedAudio(audio) | ||
| 1367 | + } | ||
| 1368 | + } | ||
| 1369 | + | ||
| 1370 | + var n: Int32 { | ||
| 1371 | + return audio.pointee.n | ||
| 1372 | + } | ||
| 1373 | + | ||
| 1374 | + var sampleRate: Int32 { | ||
| 1375 | + return audio.pointee.sample_rate | ||
| 1376 | + } | ||
| 1377 | + | ||
| 1378 | + var samples: [Float] { | ||
| 1379 | + if let p = audio.pointee.samples { | ||
| 1380 | + var samples: [Float] = [] | ||
| 1381 | + for index in 0..<n { | ||
| 1382 | + samples.append(p[Int(index)]) | ||
| 1383 | + } | ||
| 1384 | + return samples | ||
| 1385 | + } else { | ||
| 1386 | + let samples: [Float] = [] | ||
| 1387 | + return samples | ||
| 1388 | + } | ||
| 1389 | + } | ||
| 1390 | + | ||
| 1391 | + func save(filename: String) -> Int32 { | ||
| 1392 | + return SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename)) | ||
| 1393 | + } | ||
| 1394 | +} | ||
| 1395 | + | ||
| 1396 | +class SherpaOnnxOfflineSpeechDenoiserWrapper { | ||
| 1397 | + /// A pointer to the underlying counterpart in C | ||
| 1398 | + let impl: OpaquePointer! | ||
| 1399 | + | ||
| 1400 | + /// Constructor taking a model config | ||
| 1401 | + init( | ||
| 1402 | + config: UnsafePointer<SherpaOnnxOfflineSpeechDenoiserConfig>! | ||
| 1403 | + ) { | ||
| 1404 | + impl = SherpaOnnxCreateOfflineSpeechDenoiser(config) | ||
| 1405 | + } | ||
| 1406 | + | ||
| 1407 | + deinit { | ||
| 1408 | + if let impl { | ||
| 1409 | + SherpaOnnxDestroyOfflineSpeechDenoiser(impl) | ||
| 1410 | + } | ||
| 1411 | + } | ||
| 1412 | + | ||
| 1413 | + func run(samples: [Float], sampleRate: Int) -> SherpaOnnxDenoisedAudioWrapper { | ||
| 1414 | + let audio: UnsafePointer<SherpaOnnxDenoisedAudio>? = SherpaOnnxOfflineSpeechDenoiserRun( | ||
| 1415 | + impl, samples, Int32(samples.count), Int32(sampleRate)) | ||
| 1416 | + | ||
| 1417 | + return SherpaOnnxDenoisedAudioWrapper(audio: audio) | ||
| 1418 | + } | ||
| 1419 | + | ||
| 1420 | + var sampleRate: Int { | ||
| 1421 | + return Int(SherpaOnnxOfflineSpeechDenoiserGetSampleRate(impl)) | ||
| 1422 | + } | ||
| 1423 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -f ./gtcrn_simple.onnx ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx | ||
| 12 | +fi | ||
| 13 | + | ||
| 14 | +if [ ! -f ./inp_16k.wav ]; then | ||
| 15 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +if [ ! -e ./speech-enhancement-gtcrn ]; then | ||
| 19 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 20 | + swiftc \ | ||
| 21 | + -lc++ \ | ||
| 22 | + -I ../build-swift-macos/install/include \ | ||
| 23 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 24 | + ./speech-enhancement-gtcrn.swift ./SherpaOnnx.swift \ | ||
| 25 | + -L ../build-swift-macos/install/lib/ \ | ||
| 26 | + -l sherpa-onnx \ | ||
| 27 | + -l onnxruntime \ | ||
| 28 | + -o speech-enhancement-gtcrn | ||
| 29 | + | ||
| 30 | + strip speech-enhancement-gtcrn | ||
| 31 | +else | ||
| 32 | + echo "./speech-enhancement-gtcrn exists - skip building" | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 36 | +./speech-enhancement-gtcrn |
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + // Please refer to | ||
| 17 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models | ||
| 18 | + // to download files used in this script | ||
| 19 | + var config = sherpaOnnxOfflineSpeechDenoiserConfig( | ||
| 20 | + model: sherpaOnnxOfflineSpeechDenoiserModelConfig( | ||
| 21 | + gtcrn: sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig(model: "./gtcrn_simple.onnx")) | ||
| 22 | + ) | ||
| 23 | + | ||
| 24 | + let sd = SherpaOnnxOfflineSpeechDenoiserWrapper(config: &config) | ||
| 25 | + | ||
| 26 | + let fileURL: NSURL = NSURL(fileURLWithPath: "./inp_16k.wav") | ||
| 27 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 28 | + | ||
| 29 | + let audioFormat = audioFile.processingFormat | ||
| 30 | + assert(audioFormat.sampleRate == 16000) | ||
| 31 | + assert(audioFormat.channelCount == 1) | ||
| 32 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 33 | + | ||
| 34 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 35 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 36 | + | ||
| 37 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 38 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 39 | + let audio = sd.run(samples: array, sampleRate: Int(audioFormat.sampleRate)) | ||
| 40 | + | ||
| 41 | + let filename = "enhanced_16k.wav" | ||
| 42 | + let ok = audio.save(filename: filename) | ||
| 43 | + if ok == 1 { | ||
| 44 | + print("\nSaved to:\(filename)") | ||
| 45 | + } else { | ||
| 46 | + print("Failed to save to \(filename)") | ||
| 47 | + } | ||
| 48 | +} | ||
| 49 | + | ||
| 50 | +@main | ||
| 51 | +struct App { | ||
| 52 | + static func main() { | ||
| 53 | + run() | ||
| 54 | + } | ||
| 55 | +} |
-
请 注册 或 登录 后发表评论