Add Swift API for homophone replacer. (#2164)

Fangjun Kuang · GitHub
Commit 63d01a953400e4618112c414c2f951853b0750bf 63d01a95 1 parent 4a7a974a
.github/scripts/test-swift.sh
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/decode-file-sense-voice-with-hr.swift
swift-api-examples/run-decode-file-sense-voice-with-hr.sh
--- a/.github/scripts/test-swift.sh
查看文件 @63d01a9
+++ b/.github/scripts/test-swift.sh
查看文件 @63d01a9
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+ ./run-decode-file-sense-voice-with-hr.sh
+ rm -rf sherpa-onnx-sense-voice-*
+ rm -rf dict lexicon.txt replace.fst test-hr.wav
+ 
 ./run-dolphin-ctc-asr.sh
 rm -rf sherpa-onnx-dolphin-*
 
--- a/swift-api-examples/.gitignore
查看文件 @63d01a9
+++ b/swift-api-examples/.gitignore
查看文件 @63d01a9
@@ -15,3 +15,4 @@ tts-matcha-en
 tts-kokoro-en
 tts-kokoro-zh-en
 speech-enhancement-gtcrn
+ decode-file-sense-voice-with-hr
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @63d01a9
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @63d01a9
@@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig(
     max_active: Int32(maxActive))
 }
 
+ func sherpaOnnxHomophoneReplacerConfig(
+   dictDir: String = "",
+   lexicon: String = "",
+   ruleFsts: String = ""
+ ) -> SherpaOnnxHomophoneReplacerConfig {
+   return SherpaOnnxHomophoneReplacerConfig(
+     dict_dir: toCPointer(dictDir),
+     lexicon: toCPointer(lexicon),
+     rule_fsts: toCPointer(ruleFsts))
+ }
+ 
 func sherpaOnnxOnlineRecognizerConfig(
   featConfig: SherpaOnnxFeatureConfig,
   modelConfig: SherpaOnnxOnlineModelConfig,
@@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig(
   ruleFars: String = "",
   blankPenalty: Float = 0.0,
   hotwordsBuf: String = "",
-   hotwordsBufSize: Int = 0
+   hotwordsBufSize: Int = 0,
+   hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
 ) -> SherpaOnnxOnlineRecognizerConfig {
   return SherpaOnnxOnlineRecognizerConfig(
     feat_config: featConfig,
@@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig(
     rule_fars: toCPointer(ruleFars),
     blank_penalty: blankPenalty,
     hotwords_buf: toCPointer(hotwordsBuf),
-     hotwords_buf_size: Int32(hotwordsBufSize)
+     hotwords_buf_size: Int32(hotwordsBufSize),
+     hr: hr
   )
 }
 
@@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig(
   hotwordsScore: Float = 1.5,
   ruleFsts: String = "",
   ruleFars: String = "",
-   blankPenalty: Float = 0.0
+   blankPenalty: Float = 0.0,
+   hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
 ) -> SherpaOnnxOfflineRecognizerConfig {
   return SherpaOnnxOfflineRecognizerConfig(
     feat_config: featConfig,
@@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig(
     hotwords_score: hotwordsScore,
     rule_fsts: toCPointer(ruleFsts),
     rule_fars: toCPointer(ruleFars),
-     blank_penalty: blankPenalty
+     blank_penalty: blankPenalty,
+     hr: hr
   )
 }
 
--- a/swift-api-examples/decode-file-sense-voice-with-hr.swift 0 → 100644
查看文件 @63d01a9
+++ b/swift-api-examples/decode-file-sense-voice-with-hr.swift 0 → 100644
查看文件 @63d01a9
+ import AVFoundation
+ 
+ extension AudioBuffer {
+   func array() -> [Float] {
+     return Array(UnsafeBufferPointer(self))
+   }
+ }
+ 
+ extension AVAudioPCMBuffer {
+   func array() -> [Float] {
+     return self.audioBufferList.pointee.mBuffers.array()
+   }
+ }
+ 
+ func run() {
+   var recognizer: SherpaOnnxOfflineRecognizer
+   let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
+   let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
+   let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig(
+     model: model,
+     useInverseTextNormalization: true
+   )
+ 
+   let modelConfig = sherpaOnnxOfflineModelConfig(
+     tokens: tokens,
+     debug: 0,
+     senseVoice: senseVoiceConfig
+   )
+ 
+   let featConfig = sherpaOnnxFeatureConfig(
+     sampleRate: 16000,
+     featureDim: 80
+   )
+ 
+   let hrConfig = sherpaOnnxHomophoneReplacerConfig(
+     dictDir: "./dict",
+     lexicon: "./lexicon.txt",
+     ruleFsts: "./replace.fst"
+   )
+   var config = sherpaOnnxOfflineRecognizerConfig(
+     featConfig: featConfig,
+     modelConfig: modelConfig,
+     hr: hrConfig
+   )
+ 
+   recognizer = SherpaOnnxOfflineRecognizer(config: &config)
+ 
+   let filePath = "./test-hr.wav"
+   let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+   let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+ 
+   let audioFormat = audioFile.processingFormat
+   assert(audioFormat.channelCount == 1)
+   assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+ 
+   let audioFrameCount = UInt32(audioFile.length)
+   let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+ 
+   try! audioFile.read(into: audioFileBuffer!)
+   let array: [Float]! = audioFileBuffer?.array()
+   let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
+   print("\nresult is:\n\(result.text)")
+   if result.timestamps.count != 0 {
+     print("\ntimestamps is:\n\(result.timestamps)")
+   }
+ 
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }
--- a/swift-api-examples/run-decode-file-sense-voice-with-hr.sh 0 → 100755
查看文件 @63d01a9
+++ b/swift-api-examples/run-decode-file-sense-voice-with-hr.sh 0 → 100755
查看文件 @63d01a9
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+   tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+   rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ fi
+ 
+ if [ ! -d dict ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+   tar xf dict.tar.bz2
+   rm -rf dict.tar.bz2
+ 
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+ fi
+ 
+ if [ ! -e ./decode-file-sense-voice-with-hr ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./decode-file-sense-voice-with-hr.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o decode-file-sense-voice-with-hr
+ 
+   strip decode-file-sense-voice-with-hr
+ else
+   echo "./decode-file-sense-voice-with-hr exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./decode-file-sense-voice-with-hr