Committed by
GitHub
Add Swift API for Dolphin CTC models (#2091)
正在显示
4 个修改的文件
包含
123 行增加
和
2 行删除
| @@ -7,6 +7,9 @@ echo "pwd: $PWD" | @@ -7,6 +7,9 @@ echo "pwd: $PWD" | ||
| 7 | cd swift-api-examples | 7 | cd swift-api-examples |
| 8 | ls -lh | 8 | ls -lh |
| 9 | 9 | ||
| 10 | +./run-dolphin-ctc-asr.sh | ||
| 11 | +rm -rf sherpa-onnx-dolphin-* | ||
| 12 | + | ||
| 10 | ./run-speech-enhancement-gtcrn.sh | 13 | ./run-speech-enhancement-gtcrn.sh |
| 11 | ls -lh *.wav | 14 | ls -lh *.wav |
| 12 | 15 |
| @@ -341,6 +341,14 @@ func sherpaOnnxOfflineNemoEncDecCtcModelConfig( | @@ -341,6 +341,14 @@ func sherpaOnnxOfflineNemoEncDecCtcModelConfig( | ||
| 341 | ) | 341 | ) |
| 342 | } | 342 | } |
| 343 | 343 | ||
| 344 | +func sherpaOnnxOfflineDolphinModelConfig( | ||
| 345 | + model: String = "" | ||
| 346 | +) -> SherpaOnnxOfflineDolphinModelConfig { | ||
| 347 | + return SherpaOnnxOfflineDolphinModelConfig( | ||
| 348 | + model: toCPointer(model) | ||
| 349 | + ) | ||
| 350 | +} | ||
| 351 | + | ||
| 344 | func sherpaOnnxOfflineWhisperModelConfig( | 352 | func sherpaOnnxOfflineWhisperModelConfig( |
| 345 | encoder: String = "", | 353 | encoder: String = "", |
| 346 | decoder: String = "", | 354 | decoder: String = "", |
| @@ -427,7 +435,8 @@ func sherpaOnnxOfflineModelConfig( | @@ -427,7 +435,8 @@ func sherpaOnnxOfflineModelConfig( | ||
| 427 | teleSpeechCtc: String = "", | 435 | teleSpeechCtc: String = "", |
| 428 | senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(), | 436 | senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(), |
| 429 | moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(), | 437 | moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(), |
| 430 | - fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig() | 438 | + fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(), |
| 439 | + dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig() | ||
| 431 | ) -> SherpaOnnxOfflineModelConfig { | 440 | ) -> SherpaOnnxOfflineModelConfig { |
| 432 | return SherpaOnnxOfflineModelConfig( | 441 | return SherpaOnnxOfflineModelConfig( |
| 433 | transducer: transducer, | 442 | transducer: transducer, |
| @@ -445,7 +454,8 @@ func sherpaOnnxOfflineModelConfig( | @@ -445,7 +454,8 @@ func sherpaOnnxOfflineModelConfig( | ||
| 445 | telespeech_ctc: toCPointer(teleSpeechCtc), | 454 | telespeech_ctc: toCPointer(teleSpeechCtc), |
| 446 | sense_voice: senseVoice, | 455 | sense_voice: senseVoice, |
| 447 | moonshine: moonshine, | 456 | moonshine: moonshine, |
| 448 | - fire_red_asr: fireRedAsr | 457 | + fire_red_asr: fireRedAsr, |
| 458 | + dolphin: dolphin | ||
| 449 | ) | 459 | ) |
| 450 | } | 460 | } |
| 451 | 461 |
swift-api-examples/dolphin-ctc-asr.swift
0 → 100644
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + let model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx" | ||
| 17 | + let tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt" | ||
| 18 | + | ||
| 19 | + let dolphin = sherpaOnnxOfflineDolphinModelConfig( | ||
| 20 | + model: model | ||
| 21 | + ) | ||
| 22 | + | ||
| 23 | + let modelConfig = sherpaOnnxOfflineModelConfig( | ||
| 24 | + tokens: tokens, | ||
| 25 | + debug: 0, | ||
| 26 | + dolphin: dolphin | ||
| 27 | + ) | ||
| 28 | + | ||
| 29 | + let featConfig = sherpaOnnxFeatureConfig( | ||
| 30 | + sampleRate: 16000, | ||
| 31 | + featureDim: 80 | ||
| 32 | + ) | ||
| 33 | + var config = sherpaOnnxOfflineRecognizerConfig( | ||
| 34 | + featConfig: featConfig, | ||
| 35 | + modelConfig: modelConfig | ||
| 36 | + ) | ||
| 37 | + | ||
| 38 | + let recognizer = SherpaOnnxOfflineRecognizer(config: &config) | ||
| 39 | + | ||
| 40 | + let filePath = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav" | ||
| 41 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 42 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 43 | + | ||
| 44 | + let audioFormat = audioFile.processingFormat | ||
| 45 | + assert(audioFormat.channelCount == 1) | ||
| 46 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 47 | + | ||
| 48 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 49 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 50 | + | ||
| 51 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 52 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 53 | + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate)) | ||
| 54 | + print("\nresult is:\n\(result.text)") | ||
| 55 | + if result.timestamps.count != 0 { | ||
| 56 | + print("\ntimestamps is:\n\(result.timestamps)") | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | +} | ||
| 60 | + | ||
| 61 | +@main | ||
| 62 | +struct App { | ||
| 63 | + static func main() { | ||
| 64 | + run() | ||
| 65 | + } | ||
| 66 | +} |
swift-api-examples/run-dolphin-ctc-asr.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then | ||
| 11 | + echo "Please download the pre-trained model for testing." | ||
| 12 | + echo "You can refer to" | ||
| 13 | + echo "" | ||
| 14 | + echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/dolphin/index.html" | ||
| 15 | + echo "" | ||
| 16 | + echo "for help" | ||
| 17 | + | ||
| 18 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 19 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 20 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 21 | + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 | ||
| 22 | +fi | ||
| 23 | + | ||
| 24 | +if [ ! -e ./dolphin-ctc-asr ]; then | ||
| 25 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 26 | + swiftc \ | ||
| 27 | + -lc++ \ | ||
| 28 | + -I ../build-swift-macos/install/include \ | ||
| 29 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 30 | + ./dolphin-ctc-asr.swift ./SherpaOnnx.swift \ | ||
| 31 | + -L ../build-swift-macos/install/lib/ \ | ||
| 32 | + -l sherpa-onnx \ | ||
| 33 | + -l onnxruntime \ | ||
| 34 | + -o dolphin-ctc-asr | ||
| 35 | + | ||
| 36 | + strip dolphin-ctc-asr | ||
| 37 | +else | ||
| 38 | + echo "./dolphin-ctc-asr exists - skip building" | ||
| 39 | +fi | ||
| 40 | + | ||
| 41 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 42 | +./dolphin-ctc-asr |
-
请 注册 或 登录 后发表评论