Add Swift API for Dolphin CTC models (#2091)

Fangjun Kuang · GitHub
Commit 74f402e4904256f45d9417024567c762cf08b304 74f402e4 1 parent ba7d8b63
.github/scripts/test-swift.sh
swift-api-examples/SherpaOnnx.swift
swift-api-examples/dolphin-ctc-asr.swift
swift-api-examples/run-dolphin-ctc-asr.sh
--- a/.github/scripts/test-swift.sh
查看文件 @74f402e
+++ b/.github/scripts/test-swift.sh
查看文件 @74f402e
@@ -7,6 +7,9 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
+./run-dolphin-ctc-asr.sh
+rm -rf sherpa-onnx-dolphin-*
+
 ./run-speech-enhancement-gtcrn.sh
 ls -lh *.wav
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @74f402e
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @74f402e
@@ -341,6 +341,14 @@ func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
   )
 }
+func sherpaOnnxOfflineDolphinModelConfig(
+  model: String = ""
+) -> SherpaOnnxOfflineDolphinModelConfig {
+  return SherpaOnnxOfflineDolphinModelConfig(
+    model: toCPointer(model)
+  )
+}
+
 func sherpaOnnxOfflineWhisperModelConfig(
   encoder: String = "",
   decoder: String = "",
@@ -427,7 +435,8 @@ func sherpaOnnxOfflineModelConfig(
   teleSpeechCtc: String = "",
   senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(),
   moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(),
-  fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig()
+  fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(),
+  dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig()
 ) -> SherpaOnnxOfflineModelConfig {
   return SherpaOnnxOfflineModelConfig(
     transducer: transducer,
@@ -445,7 +454,8 @@ func sherpaOnnxOfflineModelConfig(
     telespeech_ctc: toCPointer(teleSpeechCtc),
     sense_voice: senseVoice,
     moonshine: moonshine,
-    fire_red_asr: fireRedAsr
+    fire_red_asr: fireRedAsr,
+    dolphin: dolphin
   )
 }
--- a/swift-api-examples/dolphin-ctc-asr.swift 0 → 100644
查看文件 @74f402e
+++ b/swift-api-examples/dolphin-ctc-asr.swift 0 → 100644
查看文件 @74f402e
+import AVFoundation
+
+extension AudioBuffer {
+  func array() -> [Float] {
+    return Array(UnsafeBufferPointer(self))
+  }
+}
+
+extension AVAudioPCMBuffer {
+  func array() -> [Float] {
+    return self.audioBufferList.pointee.mBuffers.array()
+  }
+}
+
+func run() {
+  let model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
+  let tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
+
+  let dolphin = sherpaOnnxOfflineDolphinModelConfig(
+    model: model
+  )
+
+  let modelConfig = sherpaOnnxOfflineModelConfig(
+    tokens: tokens,
+    debug: 0,
+    dolphin: dolphin
+  )
+
+  let featConfig = sherpaOnnxFeatureConfig(
+    sampleRate: 16000,
+    featureDim: 80
+  )
+  var config = sherpaOnnxOfflineRecognizerConfig(
+    featConfig: featConfig,
+    modelConfig: modelConfig
+  )
+
+  let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
+
+  let filePath = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
+  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+  let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+
+  let audioFormat = audioFile.processingFormat
+  assert(audioFormat.channelCount == 1)
+  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+
+  let audioFrameCount = UInt32(audioFile.length)
+  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+
+  try! audioFile.read(into: audioFileBuffer!)
+  let array: [Float]! = audioFileBuffer?.array()
+  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
+  print("\nresult is:\n\(result.text)")
+  if result.timestamps.count != 0 {
+    print("\ntimestamps is:\n\(result.timestamps)")
+  }
+
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
--- a/swift-api-examples/run-dolphin-ctc-asr.sh 0 → 100755
查看文件 @74f402e
+++ b/swift-api-examples/run-dolphin-ctc-asr.sh 0 → 100755
查看文件 @74f402e
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
+  echo "Please download the pre-trained model for testing."
+  echo "You can refer to"
+  echo ""
+  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/dolphin/index.html"
+  echo ""
+  echo "for help"
+
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+  tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+  rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+  ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
+fi
+
+if [ ! -e ./dolphin-ctc-asr ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./dolphin-ctc-asr.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o dolphin-ctc-asr
+
+  strip dolphin-ctc-asr
+else
+  echo "./dolphin-ctc-asr exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./dolphin-ctc-asr