Committed by
GitHub
Add Swift API for homophone replacer. (#2164)
正在显示
5 个修改的文件
包含
142 行增加
和
4 行删除
| @@ -7,6 +7,10 @@ echo "pwd: $PWD" | @@ -7,6 +7,10 @@ echo "pwd: $PWD" | ||
| 7 | cd swift-api-examples | 7 | cd swift-api-examples |
| 8 | ls -lh | 8 | ls -lh |
| 9 | 9 | ||
| 10 | +./run-decode-file-sense-voice-with-hr.sh | ||
| 11 | +rm -rf sherpa-onnx-sense-voice-* | ||
| 12 | +rm -rf dict lexicon.txt replace.fst test-hr.wav | ||
| 13 | + | ||
| 10 | ./run-dolphin-ctc-asr.sh | 14 | ./run-dolphin-ctc-asr.sh |
| 11 | rm -rf sherpa-onnx-dolphin-* | 15 | rm -rf sherpa-onnx-dolphin-* |
| 12 | 16 |
| @@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig( | @@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig( | ||
| 128 | max_active: Int32(maxActive)) | 128 | max_active: Int32(maxActive)) |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | +func sherpaOnnxHomophoneReplacerConfig( | ||
| 132 | + dictDir: String = "", | ||
| 133 | + lexicon: String = "", | ||
| 134 | + ruleFsts: String = "" | ||
| 135 | +) -> SherpaOnnxHomophoneReplacerConfig { | ||
| 136 | + return SherpaOnnxHomophoneReplacerConfig( | ||
| 137 | + dict_dir: toCPointer(dictDir), | ||
| 138 | + lexicon: toCPointer(lexicon), | ||
| 139 | + rule_fsts: toCPointer(ruleFsts)) | ||
| 140 | +} | ||
| 141 | + | ||
| 131 | func sherpaOnnxOnlineRecognizerConfig( | 142 | func sherpaOnnxOnlineRecognizerConfig( |
| 132 | featConfig: SherpaOnnxFeatureConfig, | 143 | featConfig: SherpaOnnxFeatureConfig, |
| 133 | modelConfig: SherpaOnnxOnlineModelConfig, | 144 | modelConfig: SherpaOnnxOnlineModelConfig, |
| @@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig( | @@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig( | ||
| 144 | ruleFars: String = "", | 155 | ruleFars: String = "", |
| 145 | blankPenalty: Float = 0.0, | 156 | blankPenalty: Float = 0.0, |
| 146 | hotwordsBuf: String = "", | 157 | hotwordsBuf: String = "", |
| 147 | - hotwordsBufSize: Int = 0 | 158 | + hotwordsBufSize: Int = 0, |
| 159 | + hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig() | ||
| 148 | ) -> SherpaOnnxOnlineRecognizerConfig { | 160 | ) -> SherpaOnnxOnlineRecognizerConfig { |
| 149 | return SherpaOnnxOnlineRecognizerConfig( | 161 | return SherpaOnnxOnlineRecognizerConfig( |
| 150 | feat_config: featConfig, | 162 | feat_config: featConfig, |
| @@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig( | @@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig( | ||
| 162 | rule_fars: toCPointer(ruleFars), | 174 | rule_fars: toCPointer(ruleFars), |
| 163 | blank_penalty: blankPenalty, | 175 | blank_penalty: blankPenalty, |
| 164 | hotwords_buf: toCPointer(hotwordsBuf), | 176 | hotwords_buf: toCPointer(hotwordsBuf), |
| 165 | - hotwords_buf_size: Int32(hotwordsBufSize) | 177 | + hotwords_buf_size: Int32(hotwordsBufSize), |
| 178 | + hr: hr | ||
| 166 | ) | 179 | ) |
| 167 | } | 180 | } |
| 168 | 181 | ||
| @@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig( | @@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig( | ||
| 469 | hotwordsScore: Float = 1.5, | 482 | hotwordsScore: Float = 1.5, |
| 470 | ruleFsts: String = "", | 483 | ruleFsts: String = "", |
| 471 | ruleFars: String = "", | 484 | ruleFars: String = "", |
| 472 | - blankPenalty: Float = 0.0 | 485 | + blankPenalty: Float = 0.0, |
| 486 | + hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig() | ||
| 473 | ) -> SherpaOnnxOfflineRecognizerConfig { | 487 | ) -> SherpaOnnxOfflineRecognizerConfig { |
| 474 | return SherpaOnnxOfflineRecognizerConfig( | 488 | return SherpaOnnxOfflineRecognizerConfig( |
| 475 | feat_config: featConfig, | 489 | feat_config: featConfig, |
| @@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig( | @@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig( | ||
| 481 | hotwords_score: hotwordsScore, | 495 | hotwords_score: hotwordsScore, |
| 482 | rule_fsts: toCPointer(ruleFsts), | 496 | rule_fsts: toCPointer(ruleFsts), |
| 483 | rule_fars: toCPointer(ruleFars), | 497 | rule_fars: toCPointer(ruleFars), |
| 484 | - blank_penalty: blankPenalty | 498 | + blank_penalty: blankPenalty, |
| 499 | + hr: hr | ||
| 485 | ) | 500 | ) |
| 486 | } | 501 | } |
| 487 | 502 |
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + var recognizer: SherpaOnnxOfflineRecognizer | ||
| 17 | + let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx" | ||
| 18 | + let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt" | ||
| 19 | + let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig( | ||
| 20 | + model: model, | ||
| 21 | + useInverseTextNormalization: true | ||
| 22 | + ) | ||
| 23 | + | ||
| 24 | + let modelConfig = sherpaOnnxOfflineModelConfig( | ||
| 25 | + tokens: tokens, | ||
| 26 | + debug: 0, | ||
| 27 | + senseVoice: senseVoiceConfig | ||
| 28 | + ) | ||
| 29 | + | ||
| 30 | + let featConfig = sherpaOnnxFeatureConfig( | ||
| 31 | + sampleRate: 16000, | ||
| 32 | + featureDim: 80 | ||
| 33 | + ) | ||
| 34 | + | ||
| 35 | + let hrConfig = sherpaOnnxHomophoneReplacerConfig( | ||
| 36 | + dictDir: "./dict", | ||
| 37 | + lexicon: "./lexicon.txt", | ||
| 38 | + ruleFsts: "./replace.fst" | ||
| 39 | + ) | ||
| 40 | + var config = sherpaOnnxOfflineRecognizerConfig( | ||
| 41 | + featConfig: featConfig, | ||
| 42 | + modelConfig: modelConfig, | ||
| 43 | + hr: hrConfig | ||
| 44 | + ) | ||
| 45 | + | ||
| 46 | + recognizer = SherpaOnnxOfflineRecognizer(config: &config) | ||
| 47 | + | ||
| 48 | + let filePath = "./test-hr.wav" | ||
| 49 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 50 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 51 | + | ||
| 52 | + let audioFormat = audioFile.processingFormat | ||
| 53 | + assert(audioFormat.channelCount == 1) | ||
| 54 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 55 | + | ||
| 56 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 57 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 58 | + | ||
| 59 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 60 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 61 | + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate)) | ||
| 62 | + print("\nresult is:\n\(result.text)") | ||
| 63 | + if result.timestamps.count != 0 { | ||
| 64 | + print("\ntimestamps is:\n\(result.timestamps)") | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | +} | ||
| 68 | + | ||
| 69 | +@main | ||
| 70 | +struct App { | ||
| 71 | + static func main() { | ||
| 72 | + run() | ||
| 73 | + } | ||
| 74 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 12 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 13 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +if [ ! -d dict ]; then | ||
| 17 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 18 | + tar xf dict.tar.bz2 | ||
| 19 | + rm -rf dict.tar.bz2 | ||
| 20 | + | ||
| 21 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 22 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 23 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [ ! -e ./decode-file-sense-voice-with-hr ]; then | ||
| 27 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 28 | + swiftc \ | ||
| 29 | + -lc++ \ | ||
| 30 | + -I ../build-swift-macos/install/include \ | ||
| 31 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 32 | + ./decode-file-sense-voice-with-hr.swift ./SherpaOnnx.swift \ | ||
| 33 | + -L ../build-swift-macos/install/lib/ \ | ||
| 34 | + -l sherpa-onnx \ | ||
| 35 | + -l onnxruntime \ | ||
| 36 | + -o decode-file-sense-voice-with-hr | ||
| 37 | + | ||
| 38 | + strip decode-file-sense-voice-with-hr | ||
| 39 | +else | ||
| 40 | + echo "./decode-file-sense-voice-with-hr exists - skip building" | ||
| 41 | +fi | ||
| 42 | + | ||
| 43 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 44 | +./decode-file-sense-voice-with-hr |
-
请 注册 或 登录 后发表评论