Fangjun Kuang
Committed by GitHub

Add Swift API for Dolphin CTC models (#2091)

@@ -7,6 +7,9 @@ echo "pwd: $PWD" @@ -7,6 +7,9 @@ echo "pwd: $PWD"
7 cd swift-api-examples 7 cd swift-api-examples
8 ls -lh 8 ls -lh
9 9
  10 +./run-dolphin-ctc-asr.sh
  11 +rm -rf sherpa-onnx-dolphin-*
  12 +
10 ./run-speech-enhancement-gtcrn.sh 13 ./run-speech-enhancement-gtcrn.sh
11 ls -lh *.wav 14 ls -lh *.wav
12 15
@@ -341,6 +341,14 @@ func sherpaOnnxOfflineNemoEncDecCtcModelConfig( @@ -341,6 +341,14 @@ func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
341 ) 341 )
342 } 342 }
343 343
  344 +func sherpaOnnxOfflineDolphinModelConfig(
  345 + model: String = ""
  346 +) -> SherpaOnnxOfflineDolphinModelConfig {
  347 + return SherpaOnnxOfflineDolphinModelConfig(
  348 + model: toCPointer(model)
  349 + )
  350 +}
  351 +
344 func sherpaOnnxOfflineWhisperModelConfig( 352 func sherpaOnnxOfflineWhisperModelConfig(
345 encoder: String = "", 353 encoder: String = "",
346 decoder: String = "", 354 decoder: String = "",
@@ -427,7 +435,8 @@ func sherpaOnnxOfflineModelConfig( @@ -427,7 +435,8 @@ func sherpaOnnxOfflineModelConfig(
427 teleSpeechCtc: String = "", 435 teleSpeechCtc: String = "",
428 senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(), 436 senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(),
429 moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(), 437 moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(),
430 - fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig() 438 + fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(),
  439 + dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig()
431 ) -> SherpaOnnxOfflineModelConfig { 440 ) -> SherpaOnnxOfflineModelConfig {
432 return SherpaOnnxOfflineModelConfig( 441 return SherpaOnnxOfflineModelConfig(
433 transducer: transducer, 442 transducer: transducer,
@@ -445,7 +454,8 @@ func sherpaOnnxOfflineModelConfig( @@ -445,7 +454,8 @@ func sherpaOnnxOfflineModelConfig(
445 telespeech_ctc: toCPointer(teleSpeechCtc), 454 telespeech_ctc: toCPointer(teleSpeechCtc),
446 sense_voice: senseVoice, 455 sense_voice: senseVoice,
447 moonshine: moonshine, 456 moonshine: moonshine,
448 - fire_red_asr: fireRedAsr 457 + fire_red_asr: fireRedAsr,
  458 + dolphin: dolphin
449 ) 459 )
450 } 460 }
451 461
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + let model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
  17 + let tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
  18 +
  19 + let dolphin = sherpaOnnxOfflineDolphinModelConfig(
  20 + model: model
  21 + )
  22 +
  23 + let modelConfig = sherpaOnnxOfflineModelConfig(
  24 + tokens: tokens,
  25 + debug: 0,
  26 + dolphin: dolphin
  27 + )
  28 +
  29 + let featConfig = sherpaOnnxFeatureConfig(
  30 + sampleRate: 16000,
  31 + featureDim: 80
  32 + )
  33 + var config = sherpaOnnxOfflineRecognizerConfig(
  34 + featConfig: featConfig,
  35 + modelConfig: modelConfig
  36 + )
  37 +
  38 + let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
  39 +
  40 + let filePath = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
  41 + let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  42 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  43 +
  44 + let audioFormat = audioFile.processingFormat
  45 + assert(audioFormat.channelCount == 1)
  46 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  47 +
  48 + let audioFrameCount = UInt32(audioFile.length)
  49 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  50 +
  51 + try! audioFile.read(into: audioFileBuffer!)
  52 + let array: [Float]! = audioFileBuffer?.array()
  53 + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  54 + print("\nresult is:\n\(result.text)")
  55 + if result.timestamps.count != 0 {
  56 + print("\ntimestamps is:\n\(result.timestamps)")
  57 + }
  58 +
  59 +}
  60 +
  61 +@main
  62 +struct App {
  63 + static func main() {
  64 + run()
  65 + }
  66 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  11 + echo "Please download the pre-trained model for testing."
  12 + echo "You can refer to"
  13 + echo ""
  14 + echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/dolphin/index.html"
  15 + echo ""
  16 + echo "for help"
  17 +
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  19 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  20 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  21 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
  22 +fi
  23 +
  24 +if [ ! -e ./dolphin-ctc-asr ]; then
  25 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  26 + swiftc \
  27 + -lc++ \
  28 + -I ../build-swift-macos/install/include \
  29 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  30 + ./dolphin-ctc-asr.swift ./SherpaOnnx.swift \
  31 + -L ../build-swift-macos/install/lib/ \
  32 + -l sherpa-onnx \
  33 + -l onnxruntime \
  34 + -o dolphin-ctc-asr
  35 +
  36 + strip dolphin-ctc-asr
  37 +else
  38 + echo "./dolphin-ctc-asr exists - skip building"
  39 +fi
  40 +
  41 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  42 +./dolphin-ctc-asr