Fangjun Kuang
Committed by GitHub

Swift API for keyword spotting. (#1027)

@@ -7,6 +7,10 @@ echo "pwd: $PWD" @@ -7,6 +7,10 @@ echo "pwd: $PWD"
7 cd swift-api-examples 7 cd swift-api-examples
8 ls -lh 8 ls -lh
9 9
  10 +./run-keyword-spotting-from-file.sh
  11 +rm ./keyword-spotting-from-file
  12 +rm -rf sherpa-onnx-kws-*
  13 +
10 ./run-streaming-hlg-decode-file.sh 14 ./run-streaming-hlg-decode-file.sh
11 rm ./streaming-hlg-decode-file 15 rm ./streaming-hlg-decode-file
12 rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 16 rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
@@ -8,3 +8,4 @@ sherpa-onnx-paraformer-zh-2023-09-14 @@ -8,3 +8,4 @@ sherpa-onnx-paraformer-zh-2023-09-14
8 !*.sh 8 !*.sh
9 *.bak 9 *.bak
10 streaming-hlg-decode-file 10 streaming-hlg-decode-file
  11 +keyword-spotting-from-file
@@ -832,3 +832,111 @@ class SherpaOnnxSpokenLanguageIdentificationWrapper { @@ -832,3 +832,111 @@ class SherpaOnnxSpokenLanguageIdentificationWrapper {
832 return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result) 832 return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result)
833 } 833 }
834 } 834 }
  835 +
  836 +// keyword spotting
  837 +
  838 +class SherpaOnnxKeywordResultWrapper {
  839 + /// A pointer to the underlying counterpart in C
  840 + let result: UnsafePointer<SherpaOnnxKeywordResult>!
  841 +
  842 + var keyword: String {
  843 + return String(cString: result.pointee.keyword)
  844 + }
  845 +
  846 + var count: Int32 {
  847 + return result.pointee.count
  848 + }
  849 +
  850 + var tokens: [String] {
  851 + if let tokensPointer = result.pointee.tokens_arr {
  852 + var tokens: [String] = []
  853 + for index in 0..<count {
  854 + if let tokenPointer = tokensPointer[Int(index)] {
  855 + let token = String(cString: tokenPointer)
  856 + tokens.append(token)
  857 + }
  858 + }
  859 + return tokens
  860 + } else {
  861 + let tokens: [String] = []
  862 + return tokens
  863 + }
  864 + }
  865 +
  866 + init(result: UnsafePointer<SherpaOnnxKeywordResult>!) {
  867 + self.result = result
  868 + }
  869 +
  870 + deinit {
  871 + if let result {
  872 + DestroyKeywordResult(result)
  873 + }
  874 + }
  875 +}
  876 +
  877 +func sherpaOnnxKeywordSpotterConfig(
  878 + featConfig: SherpaOnnxFeatureConfig,
  879 + modelConfig: SherpaOnnxOnlineModelConfig,
  880 + keywordsFile: String,
  881 + maxActivePaths: Int = 4,
  882 + numTrailingBlanks: Int = 1,
  883 + keywordsScore: Float = 1.0,
  884 + keywordsThreshold: Float = 0.25
  885 +) -> SherpaOnnxKeywordSpotterConfig {
  886 + return SherpaOnnxKeywordSpotterConfig(
  887 + feat_config: featConfig,
  888 + model_config: modelConfig,
  889 + max_active_paths: Int32(maxActivePaths),
  890 + num_trailing_blanks: Int32(numTrailingBlanks),
  891 + keywords_score: keywordsScore,
  892 + keywords_threshold: keywordsThreshold,
  893 + keywords_file: toCPointer(keywordsFile)
  894 + )
  895 +}
  896 +
  897 +class SherpaOnnxKeywordSpotterWrapper {
  898 + /// A pointer to the underlying counterpart in C
  899 + let spotter: OpaquePointer!
  900 + var stream: OpaquePointer!
  901 +
  902 + init(
  903 + config: UnsafePointer<SherpaOnnxKeywordSpotterConfig>!
  904 + ) {
  905 + spotter = CreateKeywordSpotter(config)
  906 + stream = CreateKeywordStream(spotter)
  907 + }
  908 +
  909 + deinit {
  910 + if let stream {
  911 + DestroyOnlineStream(stream)
  912 + }
  913 +
  914 + if let spotter {
  915 + DestroyKeywordSpotter(spotter)
  916 + }
  917 + }
  918 +
  919 + func acceptWaveform(samples: [Float], sampleRate: Int = 16000) {
  920 + AcceptWaveform(stream, Int32(sampleRate), samples, Int32(samples.count))
  921 + }
  922 +
  923 + func isReady() -> Bool {
  924 + return IsKeywordStreamReady(spotter, stream) == 1 ? true : false
  925 + }
  926 +
  927 + func decode() {
  928 + DecodeKeywordStream(spotter, stream)
  929 + }
  930 +
  931 + func getResult() -> SherpaOnnxKeywordResultWrapper {
  932 + let result: UnsafePointer<SherpaOnnxKeywordResult>? = GetKeywordResult(
  933 + spotter, stream)
  934 + return SherpaOnnxKeywordResultWrapper(result: result)
  935 + }
  936 +
  937 + /// Signal that no more audio samples would be available.
  938 + /// After this call, you cannot call acceptWaveform() any more.
  939 + func inputFinished() {
  940 + InputFinished(stream)
  941 + }
  942 +}
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + let filePath = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
  17 + let encoder =
  18 + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  19 + let decoder =
  20 + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  21 + let joiner =
  22 + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
  23 + let tokens =
  24 + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
  25 + let keywordsFile =
  26 + "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
  27 + let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
  28 + encoder: encoder,
  29 + decoder: decoder,
  30 + joiner: joiner
  31 + )
  32 +
  33 + let modelConfig = sherpaOnnxOnlineModelConfig(
  34 + tokens: tokens,
  35 + transducer: transducerConfig
  36 + )
  37 +
  38 + let featConfig = sherpaOnnxFeatureConfig(
  39 + sampleRate: 16000,
  40 + featureDim: 80
  41 + )
  42 + var config = sherpaOnnxKeywordSpotterConfig(
  43 + featConfig: featConfig,
  44 + modelConfig: modelConfig,
  45 + keywordsFile: keywordsFile
  46 + )
  47 +
  48 + let spotter = SherpaOnnxKeywordSpotterWrapper(config: &config)
  49 +
  50 + let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  51 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  52 +
  53 + let audioFormat = audioFile.processingFormat
  54 + assert(audioFormat.sampleRate == 16000)
  55 + assert(audioFormat.channelCount == 1)
  56 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  57 +
  58 + let audioFrameCount = UInt32(audioFile.length)
  59 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  60 +
  61 + try! audioFile.read(into: audioFileBuffer!)
  62 + let array: [Float]! = audioFileBuffer?.array()
  63 + spotter.acceptWaveform(samples: array)
  64 +
  65 + let tailPadding = [Float](repeating: 0.0, count: 3200)
  66 + spotter.acceptWaveform(samples: tailPadding)
  67 +
  68 + spotter.inputFinished()
  69 + while spotter.isReady() {
  70 + spotter.decode()
  71 + let keyword = spotter.getResult().keyword
  72 + if keyword != "" {
  73 + print("Detected: \(keyword)")
  74 + }
  75 + }
  76 +}
  77 +
  78 +@main
  79 +struct App {
  80 + static func main() {
  81 + run()
  82 + }
  83 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -d ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 ]; then
  11 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  12 + tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  13 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  14 +fi
  15 +
  16 +if [ ! -e ./keyword-spotting-from-file ]; then
  17 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  18 + swiftc \
  19 + -lc++ \
  20 + -I ../build-swift-macos/install/include \
  21 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  22 + ./keyword-spotting-from-file.swift ./SherpaOnnx.swift \
  23 + -L ../build-swift-macos/install/lib/ \
  24 + -l sherpa-onnx \
  25 + -l onnxruntime \
  26 + -o keyword-spotting-from-file
  27 +
  28 + strip keyword-spotting-from-file
  29 +else
  30 + echo "./keyword-spotting-from-file exists - skip building"
  31 +fi
  32 +
  33 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  34 +./keyword-spotting-from-file