Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-25 16:22:25 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-25 16:22:25 +0800
Commit
83a10a55a55bf407d4186c83cc48e026bc0c8164
83a10a55
1 parent
ab7cff25
Add Swift API for spoken language identification. (#696)
隐藏空白字符变更
内嵌
并排对比
正在显示
5 个修改的文件
包含
180 行增加
和
0 行删除
.github/scripts/test-swift.sh
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/run-spoken-language-identification.sh
swift-api-examples/spoken-language-identification.swift
.github/scripts/test-swift.sh
查看文件 @
83a10a5
...
...
@@ -7,6 +7,9 @@ echo "pwd: $PWD"
cd
swift-api-examples
ls -lh
./run-spoken-language-identification.sh
rm -rf sherpa-onnx-whisper
*
mkdir -p /Users/fangjun/Desktop
pushd
/Users/fangjun/Desktop
curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
...
...
swift-api-examples/.gitignore
查看文件 @
83a10a5
decode-file
decode-file-non-streaming
generate-subtitles
spoken-language-identification
tts
vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
...
...
swift-api-examples/SherpaOnnx.swift
查看文件 @
83a10a5
...
...
@@ -713,3 +713,86 @@ class SherpaOnnxOfflineTtsWrapper {
return
SherpaOnnxGeneratedAudioWrapper
(
audio
:
audio
)
}
}
// spoken language identification
func
sherpaOnnxSpokenLanguageIdentificationWhisperConfig
(
encoder
:
String
,
decoder
:
String
,
tailPaddings
:
Int
=
-
1
)
->
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
{
return
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
(
encoder
:
toCPointer
(
encoder
),
decoder
:
toCPointer
(
decoder
),
tail_paddings
:
Int32
(
tailPaddings
))
}
func
sherpaOnnxSpokenLanguageIdentificationConfig
(
whisper
:
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
,
numThreads
:
Int
=
1
,
debug
:
Int
=
0
,
provider
:
String
=
"cpu"
)
->
SherpaOnnxSpokenLanguageIdentificationConfig
{
return
SherpaOnnxSpokenLanguageIdentificationConfig
(
whisper
:
whisper
,
num_threads
:
Int32
(
numThreads
),
debug
:
Int32
(
debug
),
provider
:
toCPointer
(
provider
))
}
class
SherpaOnnxSpokenLanguageIdentificationResultWrapper
{
/// A pointer to the underlying counterpart in C
let
result
:
UnsafePointer
<
SherpaOnnxSpokenLanguageIdentificationResult
>!
/// Return the detected language.
/// en for English
/// zh for Chinese
/// es for Spanish
/// de for German
/// etc.
var
lang
:
String
{
return
String
(
cString
:
result
.
pointee
.
lang
)
}
init
(
result
:
UnsafePointer
<
SherpaOnnxSpokenLanguageIdentificationResult
>!
)
{
self
.
result
=
result
}
deinit
{
if
let
result
{
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
result
)
}
}
}
class
SherpaOnnxSpokenLanguageIdentificationWrapper
{
/// A pointer to the underlying counterpart in C
let
slid
:
OpaquePointer
!
init
(
config
:
UnsafePointer
<
SherpaOnnxSpokenLanguageIdentificationConfig
>!
)
{
slid
=
SherpaOnnxCreateSpokenLanguageIdentification
(
config
)
}
deinit
{
if
let
slid
{
SherpaOnnxDestroySpokenLanguageIdentification
(
slid
)
}
}
func
decode
(
samples
:
[
Float
],
sampleRate
:
Int
=
16000
)
->
SherpaOnnxSpokenLanguageIdentificationResultWrapper
{
let
stream
:
OpaquePointer
!
=
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
slid
)
AcceptWaveformOffline
(
stream
,
Int32
(
sampleRate
),
samples
,
Int32
(
samples
.
count
))
let
result
:
UnsafePointer
<
SherpaOnnxSpokenLanguageIdentificationResult
>
?
=
SherpaOnnxSpokenLanguageIdentificationCompute
(
slid
,
stream
)
DestroyOfflineStream
(
stream
)
return
SherpaOnnxSpokenLanguageIdentificationResultWrapper
(
result
:
result
)
}
}
...
...
swift-api-examples/run-spoken-language-identification.sh
0 → 100755
查看文件 @
83a10a5
#!/usr/bin/env bash
set
-ex
if
[
! -d ../build-swift-macos
]
;
then
echo
"Please run ../build-swift-macos.sh first!"
exit
1
fi
if
[
! -d ./sherpa-onnx-whisper-tiny
]
;
then
echo
"Download a pre-trained model for testing."
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
fi
if
[
! -e ./spoken-language-identification
]
;
then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc
\
-lc++
\
-I ../build-swift-macos/install/include
\
-import-objc-header ./SherpaOnnx-Bridging-Header.h
\
./spoken-language-identification.swift ./SherpaOnnx.swift
\
-L ../build-swift-macos/install/lib/
\
-l sherpa-onnx
\
-l onnxruntime
\
-o spoken-language-identification
strip spoken-language-identification
else
echo
"./spoken-language-identification exists - skip building"
fi
export
DYLD_LIBRARY_PATH
=
$PWD
/../build-swift-macos/install/lib:
$DYLD_LIBRARY_PATH
./spoken-language-identification
...
...
swift-api-examples/spoken-language-identification.swift
0 → 100644
查看文件 @
83a10a5
import
AVFoundation
extension
AudioBuffer
{
func
array
()
->
[
Float
]
{
return
Array
(
UnsafeBufferPointer
(
self
))
}
}
extension
AVAudioPCMBuffer
{
func
array
()
->
[
Float
]
{
return
self
.
audioBufferList
.
pointee
.
mBuffers
.
array
()
}
}
func
run
()
{
let
encoder
=
"./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
let
decoder
=
"./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
let
whisperConfig
=
sherpaOnnxSpokenLanguageIdentificationWhisperConfig
(
encoder
:
encoder
,
decoder
:
decoder
)
var
config
=
sherpaOnnxSpokenLanguageIdentificationConfig
(
whisper
:
whisperConfig
,
numThreads
:
1
,
debug
:
1
,
provider
:
"cpu"
)
let
filePath
=
"./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
let
slid
=
SherpaOnnxSpokenLanguageIdentificationWrapper
(
config
:
&
config
)
let
fileURL
:
NSURL
=
NSURL
(
fileURLWithPath
:
filePath
)
let
audioFile
=
try!
AVAudioFile
(
forReading
:
fileURL
as
URL
)
let
audioFormat
=
audioFile
.
processingFormat
assert
(
audioFormat
.
sampleRate
==
16000
)
assert
(
audioFormat
.
channelCount
==
1
)
assert
(
audioFormat
.
commonFormat
==
AVAudioCommonFormat
.
pcmFormatFloat32
)
let
audioFrameCount
=
UInt32
(
audioFile
.
length
)
let
audioFileBuffer
=
AVAudioPCMBuffer
(
pcmFormat
:
audioFormat
,
frameCapacity
:
audioFrameCount
)
try!
audioFile
.
read
(
into
:
audioFileBuffer
!
)
let
array
:
[
Float
]
!
=
audioFileBuffer
?
.
array
()
let
result
=
slid
.
decode
(
samples
:
array
)
print
(
"
\n
Detectedllanguage is:
\n\(
result
.
lang
)
"
)
}
@main
struct
App
{
static
func
main
()
{
run
()
}
}
...
...
请
注册
或
登录
后发表评论