Fangjun Kuang
Committed by GitHub

Add Swift API for homophone replacer. (#2164)

@@ -7,6 +7,10 @@ echo "pwd: $PWD" @@ -7,6 +7,10 @@ echo "pwd: $PWD"
7 cd swift-api-examples 7 cd swift-api-examples
8 ls -lh 8 ls -lh
9 9
  10 +./run-decode-file-sense-voice-with-hr.sh
  11 +rm -rf sherpa-onnx-sense-voice-*
  12 +rm -rf dict lexicon.txt replace.fst test-hr.wav
  13 +
10 ./run-dolphin-ctc-asr.sh 14 ./run-dolphin-ctc-asr.sh
11 rm -rf sherpa-onnx-dolphin-* 15 rm -rf sherpa-onnx-dolphin-*
12 16
@@ -15,3 +15,4 @@ tts-matcha-en @@ -15,3 +15,4 @@ tts-matcha-en
15 tts-kokoro-en 15 tts-kokoro-en
16 tts-kokoro-zh-en 16 tts-kokoro-zh-en
17 speech-enhancement-gtcrn 17 speech-enhancement-gtcrn
  18 +decode-file-sense-voice-with-hr
@@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig( @@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig(
128 max_active: Int32(maxActive)) 128 max_active: Int32(maxActive))
129 } 129 }
130 130
  131 +func sherpaOnnxHomophoneReplacerConfig(
  132 + dictDir: String = "",
  133 + lexicon: String = "",
  134 + ruleFsts: String = ""
  135 +) -> SherpaOnnxHomophoneReplacerConfig {
  136 + return SherpaOnnxHomophoneReplacerConfig(
  137 + dict_dir: toCPointer(dictDir),
  138 + lexicon: toCPointer(lexicon),
  139 + rule_fsts: toCPointer(ruleFsts))
  140 +}
  141 +
131 func sherpaOnnxOnlineRecognizerConfig( 142 func sherpaOnnxOnlineRecognizerConfig(
132 featConfig: SherpaOnnxFeatureConfig, 143 featConfig: SherpaOnnxFeatureConfig,
133 modelConfig: SherpaOnnxOnlineModelConfig, 144 modelConfig: SherpaOnnxOnlineModelConfig,
@@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig( @@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig(
144 ruleFars: String = "", 155 ruleFars: String = "",
145 blankPenalty: Float = 0.0, 156 blankPenalty: Float = 0.0,
146 hotwordsBuf: String = "", 157 hotwordsBuf: String = "",
147 - hotwordsBufSize: Int = 0 158 + hotwordsBufSize: Int = 0,
  159 + hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
148 ) -> SherpaOnnxOnlineRecognizerConfig { 160 ) -> SherpaOnnxOnlineRecognizerConfig {
149 return SherpaOnnxOnlineRecognizerConfig( 161 return SherpaOnnxOnlineRecognizerConfig(
150 feat_config: featConfig, 162 feat_config: featConfig,
@@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig( @@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig(
162 rule_fars: toCPointer(ruleFars), 174 rule_fars: toCPointer(ruleFars),
163 blank_penalty: blankPenalty, 175 blank_penalty: blankPenalty,
164 hotwords_buf: toCPointer(hotwordsBuf), 176 hotwords_buf: toCPointer(hotwordsBuf),
165 - hotwords_buf_size: Int32(hotwordsBufSize) 177 + hotwords_buf_size: Int32(hotwordsBufSize),
  178 + hr: hr
166 ) 179 )
167 } 180 }
168 181
@@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig( @@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig(
469 hotwordsScore: Float = 1.5, 482 hotwordsScore: Float = 1.5,
470 ruleFsts: String = "", 483 ruleFsts: String = "",
471 ruleFars: String = "", 484 ruleFars: String = "",
472 - blankPenalty: Float = 0.0 485 + blankPenalty: Float = 0.0,
  486 + hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig()
473 ) -> SherpaOnnxOfflineRecognizerConfig { 487 ) -> SherpaOnnxOfflineRecognizerConfig {
474 return SherpaOnnxOfflineRecognizerConfig( 488 return SherpaOnnxOfflineRecognizerConfig(
475 feat_config: featConfig, 489 feat_config: featConfig,
@@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig( @@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig(
481 hotwords_score: hotwordsScore, 495 hotwords_score: hotwordsScore,
482 rule_fsts: toCPointer(ruleFsts), 496 rule_fsts: toCPointer(ruleFsts),
483 rule_fars: toCPointer(ruleFars), 497 rule_fars: toCPointer(ruleFars),
484 - blank_penalty: blankPenalty 498 + blank_penalty: blankPenalty,
  499 + hr: hr
485 ) 500 )
486 } 501 }
487 502
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + var recognizer: SherpaOnnxOfflineRecognizer
  17 + let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
  18 + let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
  19 + let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig(
  20 + model: model,
  21 + useInverseTextNormalization: true
  22 + )
  23 +
  24 + let modelConfig = sherpaOnnxOfflineModelConfig(
  25 + tokens: tokens,
  26 + debug: 0,
  27 + senseVoice: senseVoiceConfig
  28 + )
  29 +
  30 + let featConfig = sherpaOnnxFeatureConfig(
  31 + sampleRate: 16000,
  32 + featureDim: 80
  33 + )
  34 +
  35 + let hrConfig = sherpaOnnxHomophoneReplacerConfig(
  36 + dictDir: "./dict",
  37 + lexicon: "./lexicon.txt",
  38 + ruleFsts: "./replace.fst"
  39 + )
  40 + var config = sherpaOnnxOfflineRecognizerConfig(
  41 + featConfig: featConfig,
  42 + modelConfig: modelConfig,
  43 + hr: hrConfig
  44 + )
  45 +
  46 + recognizer = SherpaOnnxOfflineRecognizer(config: &config)
  47 +
  48 + let filePath = "./test-hr.wav"
  49 + let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  50 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  51 +
  52 + let audioFormat = audioFile.processingFormat
  53 + assert(audioFormat.channelCount == 1)
  54 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  55 +
  56 + let audioFrameCount = UInt32(audioFile.length)
  57 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  58 +
  59 + try! audioFile.read(into: audioFileBuffer!)
  60 + let array: [Float]! = audioFileBuffer?.array()
  61 + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  62 + print("\nresult is:\n\(result.text)")
  63 + if result.timestamps.count != 0 {
  64 + print("\ntimestamps is:\n\(result.timestamps)")
  65 + }
  66 +
  67 +}
  68 +
  69 +@main
  70 +struct App {
  71 + static func main() {
  72 + run()
  73 + }
  74 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
  11 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  12 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  13 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  14 +fi
  15 +
  16 +if [ ! -d dict ]; then
  17 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  18 + tar xf dict.tar.bz2
  19 + rm -rf dict.tar.bz2
  20 +
  21 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  22 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  23 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  24 +fi
  25 +
  26 +if [ ! -e ./decode-file-sense-voice-with-hr ]; then
  27 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  28 + swiftc \
  29 + -lc++ \
  30 + -I ../build-swift-macos/install/include \
  31 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  32 + ./decode-file-sense-voice-with-hr.swift ./SherpaOnnx.swift \
  33 + -L ../build-swift-macos/install/lib/ \
  34 + -l sherpa-onnx \
  35 + -l onnxruntime \
  36 + -o decode-file-sense-voice-with-hr
  37 +
  38 + strip decode-file-sense-voice-with-hr
  39 +else
  40 + echo "./decode-file-sense-voice-with-hr exists - skip building"
  41 +fi
  42 +
  43 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  44 +./decode-file-sense-voice-with-hr