Fangjun Kuang
Committed by GitHub

Add Swift API for FireRedAsr AED Model (#1876)

@@ -7,6 +7,10 @@ echo "pwd: $PWD" @@ -7,6 +7,10 @@ echo "pwd: $PWD"
7 cd swift-api-examples 7 cd swift-api-examples
8 ls -lh 8 ls -lh
9 9
  10 +
  11 +./run-fire-red-asr.sh
  12 +rm -rf sherpa-onnx-fire-red-asr-*
  13 +
10 ./run-tts-vits.sh 14 ./run-tts-vits.sh
11 ls -lh 15 ls -lh
12 rm -rf vits-piper-* 16 rm -rf vits-piper-*
@@ -357,6 +357,16 @@ func sherpaOnnxOfflineWhisperModelConfig( @@ -357,6 +357,16 @@ func sherpaOnnxOfflineWhisperModelConfig(
357 ) 357 )
358 } 358 }
359 359
  360 +func sherpaOnnxOfflineFireRedAsrModelConfig(
  361 + encoder: String = "",
  362 + decoder: String = ""
  363 +) -> SherpaOnnxOfflineFireRedAsrModelConfig {
  364 + return SherpaOnnxOfflineFireRedAsrModelConfig(
  365 + encoder: toCPointer(encoder),
  366 + decoder: toCPointer(decoder)
  367 + )
  368 +}
  369 +
360 func sherpaOnnxOfflineMoonshineModelConfig( 370 func sherpaOnnxOfflineMoonshineModelConfig(
361 preprocessor: String = "", 371 preprocessor: String = "",
362 encoder: String = "", 372 encoder: String = "",
@@ -416,7 +426,8 @@ func sherpaOnnxOfflineModelConfig( @@ -416,7 +426,8 @@ func sherpaOnnxOfflineModelConfig(
416 bpeVocab: String = "", 426 bpeVocab: String = "",
417 teleSpeechCtc: String = "", 427 teleSpeechCtc: String = "",
418 senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(), 428 senseVoice: SherpaOnnxOfflineSenseVoiceModelConfig = sherpaOnnxOfflineSenseVoiceModelConfig(),
419 - moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig() 429 + moonshine: SherpaOnnxOfflineMoonshineModelConfig = sherpaOnnxOfflineMoonshineModelConfig(),
  430 + fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig()
420 ) -> SherpaOnnxOfflineModelConfig { 431 ) -> SherpaOnnxOfflineModelConfig {
421 return SherpaOnnxOfflineModelConfig( 432 return SherpaOnnxOfflineModelConfig(
422 transducer: transducer, 433 transducer: transducer,
@@ -433,7 +444,8 @@ func sherpaOnnxOfflineModelConfig( @@ -433,7 +444,8 @@ func sherpaOnnxOfflineModelConfig(
433 bpe_vocab: toCPointer(bpeVocab), 444 bpe_vocab: toCPointer(bpeVocab),
434 telespeech_ctc: toCPointer(teleSpeechCtc), 445 telespeech_ctc: toCPointer(teleSpeechCtc),
435 sense_voice: senseVoice, 446 sense_voice: senseVoice,
436 - moonshine: moonshine 447 + moonshine: moonshine,
  448 + fire_red_asr: fireRedAsr
437 ) 449 )
438 } 450 }
439 451
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + let encoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx"
  17 + let decoder = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx"
  18 + let tokens = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt"
  19 +
  20 + let fireRedAsr = sherpaOnnxOfflineFireRedAsrModelConfig(
  21 + encoder: encoder,
  22 + decoder: decoder
  23 + )
  24 +
  25 + let modelConfig = sherpaOnnxOfflineModelConfig(
  26 + tokens: tokens,
  27 + debug: 0,
  28 + fireRedAsr: fireRedAsr
  29 + )
  30 +
  31 + let featConfig = sherpaOnnxFeatureConfig(
  32 + sampleRate: 16000,
  33 + featureDim: 80
  34 + )
  35 + var config = sherpaOnnxOfflineRecognizerConfig(
  36 + featConfig: featConfig,
  37 + modelConfig: modelConfig
  38 + )
  39 +
  40 + let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
  41 +
  42 + let filePath = "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav"
  43 + let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  44 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  45 +
  46 + let audioFormat = audioFile.processingFormat
  47 + assert(audioFormat.channelCount == 1)
  48 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  49 +
  50 + let audioFrameCount = UInt32(audioFile.length)
  51 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  52 +
  53 + try! audioFile.read(into: audioFileBuffer!)
  54 + let array: [Float]! = audioFileBuffer?.array()
  55 + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  56 + print("\nresult is:\n\(result.text)")
  57 + if result.timestamps.count != 0 {
  58 + print("\ntimestamps is:\n\(result.timestamps)")
  59 + }
  60 +
  61 +}
  62 +
  63 +@main
  64 +struct App {
  65 + static func main() {
  66 + run()
  67 + }
  68 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  11 + echo "Please download the pre-trained model for testing."
  12 + echo "You can refer to"
  13 + echo ""
  14 + echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/FireRedAsr/index.html"
  15 + echo ""
  16 + echo "for help"
  17 +
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  19 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  20 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  21 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
  22 +fi
  23 +
  24 +if [ ! -e ./fire-red-asr ]; then
  25 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  26 + swiftc \
  27 + -lc++ \
  28 + -I ../build-swift-macos/install/include \
  29 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  30 + ./fire-red-asr.swift ./SherpaOnnx.swift \
  31 + -L ../build-swift-macos/install/lib/ \
  32 + -l sherpa-onnx \
  33 + -l onnxruntime \
  34 + -o fire-red-asr
  35 +
  36 + strip fire-red-asr
  37 +else
  38 + echo "./fire-red-asr exists - skip building"
  39 +fi
  40 +
  41 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  42 +./fire-red-asr