Committed by
GitHub
Add Swift API for spoken language identification. (#696)
正在显示
5 个修改的文件
包含
180 行增加
和
0 行删除
| @@ -7,6 +7,9 @@ echo "pwd: $PWD" | @@ -7,6 +7,9 @@ echo "pwd: $PWD" | ||
| 7 | cd swift-api-examples | 7 | cd swift-api-examples |
| 8 | ls -lh | 8 | ls -lh |
| 9 | 9 | ||
| 10 | +./run-spoken-language-identification.sh | ||
| 11 | +rm -rf sherpa-onnx-whisper* | ||
| 12 | + | ||
| 10 | mkdir -p /Users/fangjun/Desktop | 13 | mkdir -p /Users/fangjun/Desktop |
| 11 | pushd /Users/fangjun/Desktop | 14 | pushd /Users/fangjun/Desktop |
| 12 | curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav | 15 | curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav |
| @@ -713,3 +713,86 @@ class SherpaOnnxOfflineTtsWrapper { | @@ -713,3 +713,86 @@ class SherpaOnnxOfflineTtsWrapper { | ||
| 713 | return SherpaOnnxGeneratedAudioWrapper(audio: audio) | 713 | return SherpaOnnxGeneratedAudioWrapper(audio: audio) |
| 714 | } | 714 | } |
| 715 | } | 715 | } |
| 716 | + | ||
| 717 | +// spoken language identification | ||
| 718 | + | ||
| 719 | +func sherpaOnnxSpokenLanguageIdentificationWhisperConfig( | ||
| 720 | + encoder: String, | ||
| 721 | + decoder: String, | ||
| 722 | + tailPaddings: Int = -1 | ||
| 723 | +) -> SherpaOnnxSpokenLanguageIdentificationWhisperConfig { | ||
| 724 | + return SherpaOnnxSpokenLanguageIdentificationWhisperConfig( | ||
| 725 | + encoder: toCPointer(encoder), | ||
| 726 | + decoder: toCPointer(decoder), | ||
| 727 | + tail_paddings: Int32(tailPaddings)) | ||
| 728 | +} | ||
| 729 | + | ||
| 730 | +func sherpaOnnxSpokenLanguageIdentificationConfig( | ||
| 731 | + whisper: SherpaOnnxSpokenLanguageIdentificationWhisperConfig, | ||
| 732 | + numThreads: Int = 1, | ||
| 733 | + debug: Int = 0, | ||
| 734 | + provider: String = "cpu" | ||
| 735 | +) -> SherpaOnnxSpokenLanguageIdentificationConfig { | ||
| 736 | + return SherpaOnnxSpokenLanguageIdentificationConfig( | ||
| 737 | + whisper: whisper, | ||
| 738 | + num_threads: Int32(numThreads), | ||
| 739 | + debug: Int32(debug), | ||
| 740 | + provider: toCPointer(provider)) | ||
| 741 | +} | ||
| 742 | + | ||
| 743 | +class SherpaOnnxSpokenLanguageIdentificationResultWrapper { | ||
| 744 | + /// A pointer to the underlying counterpart in C | ||
| 745 | + let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>! | ||
| 746 | + | ||
| 747 | + /// Return the detected language. | ||
| 748 | + /// en for English | ||
| 749 | + /// zh for Chinese | ||
| 750 | + /// es for Spanish | ||
| 751 | + /// de for German | ||
| 752 | + /// etc. | ||
| 753 | + var lang: String { | ||
| 754 | + return String(cString: result.pointee.lang) | ||
| 755 | + } | ||
| 756 | + | ||
| 757 | + init(result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!) { | ||
| 758 | + self.result = result | ||
| 759 | + } | ||
| 760 | + | ||
| 761 | + deinit { | ||
| 762 | + if let result { | ||
| 763 | + SherpaOnnxDestroySpokenLanguageIdentificationResult(result) | ||
| 764 | + } | ||
| 765 | + } | ||
| 766 | +} | ||
| 767 | + | ||
| 768 | +class SherpaOnnxSpokenLanguageIdentificationWrapper { | ||
| 769 | + /// A pointer to the underlying counterpart in C | ||
| 770 | + let slid: OpaquePointer! | ||
| 771 | + | ||
| 772 | + init( | ||
| 773 | + config: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationConfig>! | ||
| 774 | + ) { | ||
| 775 | + slid = SherpaOnnxCreateSpokenLanguageIdentification(config) | ||
| 776 | + } | ||
| 777 | + | ||
| 778 | + deinit { | ||
| 779 | + if let slid { | ||
| 780 | + SherpaOnnxDestroySpokenLanguageIdentification(slid) | ||
| 781 | + } | ||
| 782 | + } | ||
| 783 | + | ||
| 784 | + func decode(samples: [Float], sampleRate: Int = 16000) | ||
| 785 | + -> SherpaOnnxSpokenLanguageIdentificationResultWrapper | ||
| 786 | + { | ||
| 787 | + let stream: OpaquePointer! = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid) | ||
| 788 | + AcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count)) | ||
| 789 | + | ||
| 790 | + let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>? = | ||
| 791 | + SherpaOnnxSpokenLanguageIdentificationCompute( | ||
| 792 | + slid, | ||
| 793 | + stream) | ||
| 794 | + | ||
| 795 | + DestroyOfflineStream(stream) | ||
| 796 | + return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result) | ||
| 797 | + } | ||
| 798 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -d ./sherpa-onnx-whisper-tiny ]; then | ||
| 11 | + echo "Download a pre-trained model for testing." | ||
| 12 | + | ||
| 13 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 14 | + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 15 | + rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +if [ ! -e ./spoken-language-identification ]; then | ||
| 19 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 20 | + swiftc \ | ||
| 21 | + -lc++ \ | ||
| 22 | + -I ../build-swift-macos/install/include \ | ||
| 23 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 24 | + ./spoken-language-identification.swift ./SherpaOnnx.swift \ | ||
| 25 | + -L ../build-swift-macos/install/lib/ \ | ||
| 26 | + -l sherpa-onnx \ | ||
| 27 | + -l onnxruntime \ | ||
| 28 | + -o spoken-language-identification | ||
| 29 | + | ||
| 30 | + strip spoken-language-identification | ||
| 31 | +else | ||
| 32 | + echo "./spoken-language-identification exists - skip building" | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 36 | +./spoken-language-identification |
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + let encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx" | ||
| 17 | + let decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx" | ||
| 18 | + | ||
| 19 | + let whisperConfig = sherpaOnnxSpokenLanguageIdentificationWhisperConfig( | ||
| 20 | + encoder: encoder, | ||
| 21 | + decoder: decoder | ||
| 22 | + ) | ||
| 23 | + | ||
| 24 | + var config = sherpaOnnxSpokenLanguageIdentificationConfig( | ||
| 25 | + whisper: whisperConfig, | ||
| 26 | + numThreads: 1, | ||
| 27 | + debug: 1, | ||
| 28 | + provider: "cpu" | ||
| 29 | + ) | ||
| 30 | + let filePath = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav" | ||
| 31 | + | ||
| 32 | + let slid = SherpaOnnxSpokenLanguageIdentificationWrapper(config: &config) | ||
| 33 | + | ||
| 34 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 35 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 36 | + | ||
| 37 | + let audioFormat = audioFile.processingFormat | ||
| 38 | + assert(audioFormat.sampleRate == 16000) | ||
| 39 | + assert(audioFormat.channelCount == 1) | ||
| 40 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 41 | + | ||
| 42 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 43 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 44 | + | ||
| 45 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 46 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 47 | + let result = slid.decode(samples: array) | ||
| 48 | + | ||
| 49 | + print("\nDetectedllanguage is:\n\(result.lang)") | ||
| 50 | +} | ||
| 51 | + | ||
| 52 | +@main | ||
| 53 | +struct App { | ||
| 54 | + static func main() { | ||
| 55 | + run() | ||
| 56 | + } | ||
| 57 | +} |
-
请 注册 或 登录 后发表评论