Add Swift API for homophone replacer. (#2164)

Fangjun Kuang · GitHub
Commit 63d01a953400e4618112c414c2f951853b0750bf 63d01a95 1 parent 4a7a974a
.github/scripts/test-swift.sh
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/decode-file-sense-voice-with-hr.swift
swift-api-examples/run-decode-file-sense-voice-with-hr.sh
--- a/.github/scripts/test-swift.sh
查看文件 @63d01a9
+++ b/.github/scripts/test-swift.sh
查看文件 @63d01a9
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
+./run-decode-file-sense-voice-with-hr.sh
+rm -rf sherpa-onnx-sense-voice-*
+rm -rf dict lexicon.txt replace.fst test-hr.wav
+
 ./run-dolphin-ctc-asr.sh
 rm -rf sherpa-onnx-dolphin-*
--- a/swift-api-examples/.gitignore
查看文件 @63d01a9
+++ b/swift-api-examples/.gitignore
查看文件 @63d01a9
@@ -15,3 +15,4 @@ tts-matcha-en
 tts-kokoro-en
 tts-kokoro-zh-en
 speech-enhancement-gtcrn
+decode-file-sense-voice-with-hr
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @63d01a9
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @63d01a9
@@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig(
     max_active: Int32(maxActive))
 }
+func sherpaOnnxHomophoneReplacerConfig(
+  dictDir: String = "",
+  lexicon: String = "",
+  ruleFsts: String = ""
+) -> SherpaOnnxHomophoneReplacerConfig {
+  return SherpaOnnxHomophoneReplacerConfig(
+    dict_dir: toCPointer(dictDir),
+    lexicon: toCPointer(lexicon),
+    rule_fsts: toCPointer(ruleFsts))
+}
+
 func sherpaOnnxOnlineRecognizerConfig(
   featConfig: SherpaOnnxFeatureConfig,
   modelConfig: SherpaOnnxOnlineModelConfig,
@@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig(
   ruleFars: String = "",
   blankPenalty: Float = 0.0,
   hotwordsBuf: String = "",
-  hotwordsBufSize: Int = 0
+  hotwordsBufSize: Int = 0,
+  hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
 ) -> SherpaOnnxOnlineRecognizerConfig {
   return SherpaOnnxOnlineRecognizerConfig(
     feat_config: featConfig,
@@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig(
     rule_fars: toCPointer(ruleFars),
     blank_penalty: blankPenalty,
     hotwords_buf: toCPointer(hotwordsBuf),
-    hotwords_buf_size: Int32(hotwordsBufSize)
+    hotwords_buf_size: Int32(hotwordsBufSize),
+    hr: hr
   )
 }
@@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig(
   hotwordsScore: Float = 1.5,
   ruleFsts: String = "",
   ruleFars: String = "",
-  blankPenalty: Float = 0.0
+  blankPenalty: Float = 0.0,
+  hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
 ) -> SherpaOnnxOfflineRecognizerConfig {
   return SherpaOnnxOfflineRecognizerConfig(
     feat_config: featConfig,
@@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig(
     hotwords_score: hotwordsScore,
     rule_fsts: toCPointer(ruleFsts),
     rule_fars: toCPointer(ruleFars),
-    blank_penalty: blankPenalty
+    blank_penalty: blankPenalty,
+    hr: hr
   )
 }
--- a/swift-api-examples/decode-file-sense-voice-with-hr.swift 0 → 100644
查看文件 @63d01a9
+++ b/swift-api-examples/decode-file-sense-voice-with-hr.swift 0 → 100644
查看文件 @63d01a9
+import AVFoundation
+
+extension AudioBuffer {
+  func array() -> [Float] {
+    return Array(UnsafeBufferPointer(self))
+  }
+}
+
+extension AVAudioPCMBuffer {
+  func array() -> [Float] {
+    return self.audioBufferList.pointee.mBuffers.array()
+  }
+}
+
+func run() {
+  var recognizer: SherpaOnnxOfflineRecognizer
+  let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
+  let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
+  let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig(
+    model: model,
+    useInverseTextNormalization: true
+  )
+
+  let modelConfig = sherpaOnnxOfflineModelConfig(
+    tokens: tokens,
+    debug: 0,
+    senseVoice: senseVoiceConfig
+  )
+
+  let featConfig = sherpaOnnxFeatureConfig(
+    sampleRate: 16000,
+    featureDim: 80
+  )
+
+  let hrConfig = sherpaOnnxHomophoneReplacerConfig(
+    dictDir: "./dict",
+    lexicon: "./lexicon.txt",
+    ruleFsts: "./replace.fst"
+  )
+  var config = sherpaOnnxOfflineRecognizerConfig(
+    featConfig: featConfig,
+    modelConfig: modelConfig,
+    hr: hrConfig
+  )
+
+  recognizer = SherpaOnnxOfflineRecognizer(config: &config)
+
+  let filePath = "./test-hr.wav"
+  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+  let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+
+  let audioFormat = audioFile.processingFormat
+  assert(audioFormat.channelCount == 1)
+  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+
+  let audioFrameCount = UInt32(audioFile.length)
+  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+
+  try! audioFile.read(into: audioFileBuffer!)
+  let array: [Float]! = audioFileBuffer?.array()
+  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
+  print("\nresult is:\n\(result.text)")
+  if result.timestamps.count != 0 {
+    print("\ntimestamps is:\n\(result.timestamps)")
+  }
+
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
--- a/swift-api-examples/run-decode-file-sense-voice-with-hr.sh 0 → 100755
查看文件 @63d01a9
+++ b/swift-api-examples/run-decode-file-sense-voice-with-hr.sh 0 → 100755
查看文件 @63d01a9
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+  tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+  rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+fi
+
+if [ ! -d dict ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+  tar xf dict.tar.bz2
+  rm -rf dict.tar.bz2
+
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+fi
+
+if [ ! -e ./decode-file-sense-voice-with-hr ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./decode-file-sense-voice-with-hr.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o decode-file-sense-voice-with-hr
+
+  strip decode-file-sense-voice-with-hr
+else
+  echo "./decode-file-sense-voice-with-hr exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./decode-file-sense-voice-with-hr