正在显示
5 个修改的文件
包含
230 行增加
和
0 行删除
| @@ -7,6 +7,10 @@ echo "pwd: $PWD" | @@ -7,6 +7,10 @@ echo "pwd: $PWD" | ||
| 7 | cd swift-api-examples | 7 | cd swift-api-examples |
| 8 | ls -lh | 8 | ls -lh |
| 9 | 9 | ||
| 10 | +./run-keyword-spotting-from-file.sh | ||
| 11 | +rm ./keyword-spotting-from-file | ||
| 12 | +rm -rf sherpa-onnx-kws-* | ||
| 13 | + | ||
| 10 | ./run-streaming-hlg-decode-file.sh | 14 | ./run-streaming-hlg-decode-file.sh |
| 11 | rm ./streaming-hlg-decode-file | 15 | rm ./streaming-hlg-decode-file |
| 12 | rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 | 16 | rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 |
| @@ -832,3 +832,111 @@ class SherpaOnnxSpokenLanguageIdentificationWrapper { | @@ -832,3 +832,111 @@ class SherpaOnnxSpokenLanguageIdentificationWrapper { | ||
| 832 | return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result) | 832 | return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result) |
| 833 | } | 833 | } |
| 834 | } | 834 | } |
| 835 | + | ||
| 836 | +// keyword spotting | ||
| 837 | + | ||
| 838 | +class SherpaOnnxKeywordResultWrapper { | ||
| 839 | + /// A pointer to the underlying counterpart in C | ||
| 840 | + let result: UnsafePointer<SherpaOnnxKeywordResult>! | ||
| 841 | + | ||
| 842 | + var keyword: String { | ||
| 843 | + return String(cString: result.pointee.keyword) | ||
| 844 | + } | ||
| 845 | + | ||
| 846 | + var count: Int32 { | ||
| 847 | + return result.pointee.count | ||
| 848 | + } | ||
| 849 | + | ||
| 850 | + var tokens: [String] { | ||
| 851 | + if let tokensPointer = result.pointee.tokens_arr { | ||
| 852 | + var tokens: [String] = [] | ||
| 853 | + for index in 0..<count { | ||
| 854 | + if let tokenPointer = tokensPointer[Int(index)] { | ||
| 855 | + let token = String(cString: tokenPointer) | ||
| 856 | + tokens.append(token) | ||
| 857 | + } | ||
| 858 | + } | ||
| 859 | + return tokens | ||
| 860 | + } else { | ||
| 861 | + let tokens: [String] = [] | ||
| 862 | + return tokens | ||
| 863 | + } | ||
| 864 | + } | ||
| 865 | + | ||
| 866 | + init(result: UnsafePointer<SherpaOnnxKeywordResult>!) { | ||
| 867 | + self.result = result | ||
| 868 | + } | ||
| 869 | + | ||
| 870 | + deinit { | ||
| 871 | + if let result { | ||
| 872 | + DestroyKeywordResult(result) | ||
| 873 | + } | ||
| 874 | + } | ||
| 875 | +} | ||
| 876 | + | ||
| 877 | +func sherpaOnnxKeywordSpotterConfig( | ||
| 878 | + featConfig: SherpaOnnxFeatureConfig, | ||
| 879 | + modelConfig: SherpaOnnxOnlineModelConfig, | ||
| 880 | + keywordsFile: String, | ||
| 881 | + maxActivePaths: Int = 4, | ||
| 882 | + numTrailingBlanks: Int = 1, | ||
| 883 | + keywordsScore: Float = 1.0, | ||
| 884 | + keywordsThreshold: Float = 0.25 | ||
| 885 | +) -> SherpaOnnxKeywordSpotterConfig { | ||
| 886 | + return SherpaOnnxKeywordSpotterConfig( | ||
| 887 | + feat_config: featConfig, | ||
| 888 | + model_config: modelConfig, | ||
| 889 | + max_active_paths: Int32(maxActivePaths), | ||
| 890 | + num_trailing_blanks: Int32(numTrailingBlanks), | ||
| 891 | + keywords_score: keywordsScore, | ||
| 892 | + keywords_threshold: keywordsThreshold, | ||
| 893 | + keywords_file: toCPointer(keywordsFile) | ||
| 894 | + ) | ||
| 895 | +} | ||
| 896 | + | ||
| 897 | +class SherpaOnnxKeywordSpotterWrapper { | ||
| 898 | + /// A pointer to the underlying counterpart in C | ||
| 899 | + let spotter: OpaquePointer! | ||
| 900 | + var stream: OpaquePointer! | ||
| 901 | + | ||
| 902 | + init( | ||
| 903 | + config: UnsafePointer<SherpaOnnxKeywordSpotterConfig>! | ||
| 904 | + ) { | ||
| 905 | + spotter = CreateKeywordSpotter(config) | ||
| 906 | + stream = CreateKeywordStream(spotter) | ||
| 907 | + } | ||
| 908 | + | ||
| 909 | + deinit { | ||
| 910 | + if let stream { | ||
| 911 | + DestroyOnlineStream(stream) | ||
| 912 | + } | ||
| 913 | + | ||
| 914 | + if let spotter { | ||
| 915 | + DestroyKeywordSpotter(spotter) | ||
| 916 | + } | ||
| 917 | + } | ||
| 918 | + | ||
| 919 | + func acceptWaveform(samples: [Float], sampleRate: Int = 16000) { | ||
| 920 | + AcceptWaveform(stream, Int32(sampleRate), samples, Int32(samples.count)) | ||
| 921 | + } | ||
| 922 | + | ||
| 923 | + func isReady() -> Bool { | ||
| 924 | + return IsKeywordStreamReady(spotter, stream) == 1 ? true : false | ||
| 925 | + } | ||
| 926 | + | ||
| 927 | + func decode() { | ||
| 928 | + DecodeKeywordStream(spotter, stream) | ||
| 929 | + } | ||
| 930 | + | ||
| 931 | + func getResult() -> SherpaOnnxKeywordResultWrapper { | ||
| 932 | + let result: UnsafePointer<SherpaOnnxKeywordResult>? = GetKeywordResult( | ||
| 933 | + spotter, stream) | ||
| 934 | + return SherpaOnnxKeywordResultWrapper(result: result) | ||
| 935 | + } | ||
| 936 | + | ||
| 937 | + /// Signal that no more audio samples would be available. | ||
| 938 | + /// After this call, you cannot call acceptWaveform() any more. | ||
| 939 | + func inputFinished() { | ||
| 940 | + InputFinished(stream) | ||
| 941 | + } | ||
| 942 | +} |
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + let filePath = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav" | ||
| 17 | + let encoder = | ||
| 18 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx" | ||
| 19 | + let decoder = | ||
| 20 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx" | ||
| 21 | + let joiner = | ||
| 22 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx" | ||
| 23 | + let tokens = | ||
| 24 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt" | ||
| 25 | + let keywordsFile = | ||
| 26 | + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt" | ||
| 27 | + let transducerConfig = sherpaOnnxOnlineTransducerModelConfig( | ||
| 28 | + encoder: encoder, | ||
| 29 | + decoder: decoder, | ||
| 30 | + joiner: joiner | ||
| 31 | + ) | ||
| 32 | + | ||
| 33 | + let modelConfig = sherpaOnnxOnlineModelConfig( | ||
| 34 | + tokens: tokens, | ||
| 35 | + transducer: transducerConfig | ||
| 36 | + ) | ||
| 37 | + | ||
| 38 | + let featConfig = sherpaOnnxFeatureConfig( | ||
| 39 | + sampleRate: 16000, | ||
| 40 | + featureDim: 80 | ||
| 41 | + ) | ||
| 42 | + var config = sherpaOnnxKeywordSpotterConfig( | ||
| 43 | + featConfig: featConfig, | ||
| 44 | + modelConfig: modelConfig, | ||
| 45 | + keywordsFile: keywordsFile | ||
| 46 | + ) | ||
| 47 | + | ||
| 48 | + let spotter = SherpaOnnxKeywordSpotterWrapper(config: &config) | ||
| 49 | + | ||
| 50 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 51 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 52 | + | ||
| 53 | + let audioFormat = audioFile.processingFormat | ||
| 54 | + assert(audioFormat.sampleRate == 16000) | ||
| 55 | + assert(audioFormat.channelCount == 1) | ||
| 56 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 57 | + | ||
| 58 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 59 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 60 | + | ||
| 61 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 62 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 63 | + spotter.acceptWaveform(samples: array) | ||
| 64 | + | ||
| 65 | + let tailPadding = [Float](repeating: 0.0, count: 3200) | ||
| 66 | + spotter.acceptWaveform(samples: tailPadding) | ||
| 67 | + | ||
| 68 | + spotter.inputFinished() | ||
| 69 | + while spotter.isReady() { | ||
| 70 | + spotter.decode() | ||
| 71 | + let keyword = spotter.getResult().keyword | ||
| 72 | + if keyword != "" { | ||
| 73 | + print("Detected: \(keyword)") | ||
| 74 | + } | ||
| 75 | + } | ||
| 76 | +} | ||
| 77 | + | ||
| 78 | +@main | ||
| 79 | +struct App { | ||
| 80 | + static func main() { | ||
| 81 | + run() | ||
| 82 | + } | ||
| 83 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -d ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 12 | + tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 13 | + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +if [ ! -e ./keyword-spotting-from-file ]; then | ||
| 17 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 18 | + swiftc \ | ||
| 19 | + -lc++ \ | ||
| 20 | + -I ../build-swift-macos/install/include \ | ||
| 21 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 22 | + ./keyword-spotting-from-file.swift ./SherpaOnnx.swift \ | ||
| 23 | + -L ../build-swift-macos/install/lib/ \ | ||
| 24 | + -l sherpa-onnx \ | ||
| 25 | + -l onnxruntime \ | ||
| 26 | + -o keyword-spotting-from-file | ||
| 27 | + | ||
| 28 | + strip keyword-spotting-from-file | ||
| 29 | +else | ||
| 30 | + echo "./keyword-spotting-from-file exists - skip building" | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 34 | +./keyword-spotting-from-file |
-
请 注册 或 登录 后发表评论