Add Swift API for spoken language identification. (#696)

Fangjun Kuang · GitHub
Commit 83a10a55a55bf407d4186c83cc48e026bc0c8164 83a10a55 1 parent ab7cff25
.github/scripts/test-swift.sh
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/run-spoken-language-identification.sh
swift-api-examples/spoken-language-identification.swift
--- a/.github/scripts/test-swift.sh
查看文件 @83a10a5
+++ b/.github/scripts/test-swift.sh
查看文件 @83a10a5
@@ -7,6 +7,9 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+ ./run-spoken-language-identification.sh
+ rm -rf sherpa-onnx-whisper*
+ 
 mkdir -p /Users/fangjun/Desktop
 pushd /Users/fangjun/Desktop
 curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
--- a/swift-api-examples/.gitignore
查看文件 @83a10a5
+++ b/swift-api-examples/.gitignore
查看文件 @83a10a5
 decode-file
 decode-file-non-streaming
 generate-subtitles
+ spoken-language-identification
 tts
 vits-vctk
 sherpa-onnx-paraformer-zh-2023-09-14
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @83a10a5
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @83a10a5
@@ -713,3 +713,86 @@ class SherpaOnnxOfflineTtsWrapper {
     return SherpaOnnxGeneratedAudioWrapper(audio: audio)
   }
 }
+ 
+ // spoken language identification
+ 
+ func sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
+   encoder: String,
+   decoder: String,
+   tailPaddings: Int = -1
+ ) -> SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
+   return SherpaOnnxSpokenLanguageIdentificationWhisperConfig(
+     encoder: toCPointer(encoder),
+     decoder: toCPointer(decoder),
+     tail_paddings: Int32(tailPaddings))
+ }
+ 
+ func sherpaOnnxSpokenLanguageIdentificationConfig(
+   whisper: SherpaOnnxSpokenLanguageIdentificationWhisperConfig,
+   numThreads: Int = 1,
+   debug: Int = 0,
+   provider: String = "cpu"
+ ) -> SherpaOnnxSpokenLanguageIdentificationConfig {
+   return SherpaOnnxSpokenLanguageIdentificationConfig(
+     whisper: whisper,
+     num_threads: Int32(numThreads),
+     debug: Int32(debug),
+     provider: toCPointer(provider))
+ }
+ 
+ class SherpaOnnxSpokenLanguageIdentificationResultWrapper {
+   /// A pointer to the underlying counterpart in C
+   let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!
+ 
+   /// Return the detected language.
+   /// en for English
+   /// zh for Chinese
+   /// es for Spanish
+   /// de for German
+   /// etc.
+   var lang: String {
+     return String(cString: result.pointee.lang)
+   }
+ 
+   init(result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!) {
+     self.result = result
+   }
+ 
+   deinit {
+     if let result {
+       SherpaOnnxDestroySpokenLanguageIdentificationResult(result)
+     }
+   }
+ }
+ 
+ class SherpaOnnxSpokenLanguageIdentificationWrapper {
+   /// A pointer to the underlying counterpart in C
+   let slid: OpaquePointer!
+ 
+   init(
+     config: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationConfig>!
+   ) {
+     slid = SherpaOnnxCreateSpokenLanguageIdentification(config)
+   }
+ 
+   deinit {
+     if let slid {
+       SherpaOnnxDestroySpokenLanguageIdentification(slid)
+     }
+   }
+ 
+   func decode(samples: [Float], sampleRate: Int = 16000)
+     -> SherpaOnnxSpokenLanguageIdentificationResultWrapper
+   {
+     let stream: OpaquePointer! = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid)
+     AcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count))
+ 
+     let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>? =
+       SherpaOnnxSpokenLanguageIdentificationCompute(
+         slid,
+         stream)
+ 
+     DestroyOfflineStream(stream)
+     return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result)
+   }
+ }
--- a/swift-api-examples/run-spoken-language-identification.sh 0 → 100755
查看文件 @83a10a5
+++ b/swift-api-examples/run-spoken-language-identification.sh 0 → 100755
查看文件 @83a10a5
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
+   echo "Download a pre-trained model for testing."
+ 
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
+   tar xvf sherpa-onnx-whisper-tiny.tar.bz2
+   rm sherpa-onnx-whisper-tiny.tar.bz2
+ fi
+ 
+ if [ ! -e ./spoken-language-identification ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./spoken-language-identification.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o spoken-language-identification
+ 
+   strip spoken-language-identification
+ else
+   echo "./spoken-language-identification exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./spoken-language-identification
--- a/swift-api-examples/spoken-language-identification.swift 0 → 100644
查看文件 @83a10a5
+++ b/swift-api-examples/spoken-language-identification.swift 0 → 100644
查看文件 @83a10a5
+ import AVFoundation
+ 
+ extension AudioBuffer {
+   func array() -> [Float] {
+     return Array(UnsafeBufferPointer(self))
+   }
+ }
+ 
+ extension AVAudioPCMBuffer {
+   func array() -> [Float] {
+     return self.audioBufferList.pointee.mBuffers.array()
+   }
+ }
+ 
+ func run() {
+   let encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
+   let decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
+ 
+   let whisperConfig = sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
+     encoder: encoder,
+     decoder: decoder
+   )
+ 
+   var config = sherpaOnnxSpokenLanguageIdentificationConfig(
+     whisper: whisperConfig,
+     numThreads: 1,
+     debug: 1,
+     provider: "cpu"
+   )
+   let filePath = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
+ 
+   let slid = SherpaOnnxSpokenLanguageIdentificationWrapper(config: &config)
+ 
+   let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+   let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+ 
+   let audioFormat = audioFile.processingFormat
+   assert(audioFormat.sampleRate == 16000)
+   assert(audioFormat.channelCount == 1)
+   assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+ 
+   let audioFrameCount = UInt32(audioFile.length)
+   let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+ 
+   try! audioFile.read(into: audioFileBuffer!)
+   let array: [Float]! = audioFileBuffer?.array()
+   let result = slid.decode(samples: array)
+ 
+   print("\nDetectedllanguage is:\n\(result.lang)")
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }