Fangjun Kuang
Committed by GitHub

Add Swift API for spoken language identification. (#696)

... ... @@ -7,6 +7,9 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh
./run-spoken-language-identification.sh
rm -rf sherpa-onnx-whisper*
mkdir -p /Users/fangjun/Desktop
pushd /Users/fangjun/Desktop
curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
... ...
decode-file
decode-file-non-streaming
generate-subtitles
spoken-language-identification
tts
vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
... ...
... ... @@ -713,3 +713,86 @@ class SherpaOnnxOfflineTtsWrapper {
return SherpaOnnxGeneratedAudioWrapper(audio: audio)
}
}
// spoken language identification
func sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
encoder: String,
decoder: String,
tailPaddings: Int = -1
) -> SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
return SherpaOnnxSpokenLanguageIdentificationWhisperConfig(
encoder: toCPointer(encoder),
decoder: toCPointer(decoder),
tail_paddings: Int32(tailPaddings))
}
func sherpaOnnxSpokenLanguageIdentificationConfig(
whisper: SherpaOnnxSpokenLanguageIdentificationWhisperConfig,
numThreads: Int = 1,
debug: Int = 0,
provider: String = "cpu"
) -> SherpaOnnxSpokenLanguageIdentificationConfig {
return SherpaOnnxSpokenLanguageIdentificationConfig(
whisper: whisper,
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider))
}
class SherpaOnnxSpokenLanguageIdentificationResultWrapper {
/// A pointer to the underlying counterpart in C
let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!
/// Return the detected language.
/// en for English
/// zh for Chinese
/// es for Spanish
/// de for German
/// etc.
var lang: String {
return String(cString: result.pointee.lang)
}
init(result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!) {
self.result = result
}
deinit {
if let result {
SherpaOnnxDestroySpokenLanguageIdentificationResult(result)
}
}
}
class SherpaOnnxSpokenLanguageIdentificationWrapper {
/// A pointer to the underlying counterpart in C
let slid: OpaquePointer!
init(
config: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationConfig>!
) {
slid = SherpaOnnxCreateSpokenLanguageIdentification(config)
}
deinit {
if let slid {
SherpaOnnxDestroySpokenLanguageIdentification(slid)
}
}
func decode(samples: [Float], sampleRate: Int = 16000)
-> SherpaOnnxSpokenLanguageIdentificationResultWrapper
{
let stream: OpaquePointer! = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid)
AcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count))
let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>? =
SherpaOnnxSpokenLanguageIdentificationCompute(
slid,
stream)
DestroyOfflineStream(stream)
return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result)
}
}
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi
if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
echo "Download a pre-trained model for testing."
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
fi
if [ ! -e ./spoken-language-identification ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./spoken-language-identification.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o spoken-language-identification
strip spoken-language-identification
else
echo "./spoken-language-identification exists - skip building"
fi
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./spoken-language-identification
... ...
import AVFoundation
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}
func run() {
let encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
let decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
let whisperConfig = sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
encoder: encoder,
decoder: decoder
)
var config = sherpaOnnxSpokenLanguageIdentificationConfig(
whisper: whisperConfig,
numThreads: 1,
debug: 1,
provider: "cpu"
)
let filePath = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
let slid = SherpaOnnxSpokenLanguageIdentificationWrapper(config: &config)
let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
let audioFormat = audioFile.processingFormat
assert(audioFormat.sampleRate == 16000)
assert(audioFormat.channelCount == 1)
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
let audioFrameCount = UInt32(audioFile.length)
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
try! audioFile.read(into: audioFileBuffer!)
let array: [Float]! = audioFileBuffer?.array()
let result = slid.decode(samples: array)
print("\nDetectedllanguage is:\n\(result.lang)")
}
@main
struct App {
static func main() {
run()
}
}
... ...