Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-08-14 20:42:33 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-08-14 20:42:33 +0800
Commit
283d8fed70acd30aee6a0871b0cf74d7bfcab2af
283d8fed
1 parent
bec3e3dc
Add Swift API for computing speaker embeddings (#2492)
隐藏空白字符变更
内嵌
并排对比
正在显示
5 个修改的文件
包含
215 行增加
和
8 行删除
.github/scripts/test-swift.sh
swift-api-examples/.gitignore
swift-api-examples/SherpaOnnx.swift
swift-api-examples/compute-speaker-embeddings.swift
swift-api-examples/run-compute-speaker-embeddings.sh
.github/scripts/test-swift.sh
查看文件 @
283d8fe
...
...
@@ -9,6 +9,9 @@ ls -lh
./run-test-version.sh
./run-compute-speaker-embeddings.sh
rm -fv
*
.wav
*
.onnx
./run-tts-kitten-en.sh
ls -lh
rm -rf kitten-
*
...
...
swift-api-examples/.gitignore
查看文件 @
283d8fe
...
...
@@ -21,3 +21,4 @@ test-version
zipformer-ctc-asr
dolphin-ctc-asr
tts-kitten-en
compute-speaker-embeddings
...
...
swift-api-examples/SherpaOnnx.swift
查看文件 @
283d8fe
/// sw
fi
t-api-examples/SherpaOnnx.swift
/// sw
if
t-api-examples/SherpaOnnx.swift
/// Copyright (c) 2023 Xiaomi Corporation
import
Foundation
// For NSString
...
...
@@ -936,6 +936,41 @@ func sherpaOnnxOfflineTtsConfig(
)
}
class
SherpaOnnxWaveWrapper
{
let
wave
:
UnsafePointer
<
SherpaOnnxWave
>!
class
func
readWave
(
filename
:
String
)
->
SherpaOnnxWaveWrapper
{
let
wave
=
SherpaOnnxReadWave
(
toCPointer
(
filename
))
return
SherpaOnnxWaveWrapper
(
wave
:
wave
)
}
init
(
wave
:
UnsafePointer
<
SherpaOnnxWave
>!
)
{
self
.
wave
=
wave
}
deinit
{
if
let
wave
{
SherpaOnnxFreeWave
(
wave
)
}
}
var
numSamples
:
Int
{
return
Int
(
wave
.
pointee
.
num_samples
)
}
var
sampleRate
:
Int
{
return
Int
(
wave
.
pointee
.
sample_rate
)
}
var
samples
:
[
Float
]
{
if
numSamples
==
0
{
return
[]
}
else
{
return
[
Float
](
UnsafeBufferPointer
(
start
:
wave
.
pointee
.
samples
,
count
:
numSamples
))
}
}
}
class
SherpaOnnxGeneratedAudioWrapper
{
/// A pointer to the underlying counterpart in C
let
audio
:
UnsafePointer
<
SherpaOnnxGeneratedAudio
>!
...
...
@@ -960,14 +995,9 @@ class SherpaOnnxGeneratedAudioWrapper {
var
samples
:
[
Float
]
{
if
let
p
=
audio
.
pointee
.
samples
{
var
samples
:
[
Float
]
=
[]
for
index
in
0
..<
n
{
samples
.
append
(
p
[
Int
(
index
)])
}
return
samples
return
[
Float
](
UnsafeBufferPointer
(
start
:
p
,
count
:
Int
(
n
)))
}
else
{
let
samples
:
[
Float
]
=
[]
return
samples
return
[]
}
}
...
...
@@ -1432,6 +1462,72 @@ class SherpaOnnxOfflineSpeakerDiarizationWrapper {
}
}
class
SherpaOnnxOnlineStreamWrapper
{
/// A pointer to the underlying counterpart in C
let
impl
:
OpaquePointer
!
init
(
impl
:
OpaquePointer
!
)
{
self
.
impl
=
impl
}
deinit
{
if
let
impl
{
SherpaOnnxDestroyOnlineStream
(
impl
)
}
}
func
acceptWaveform
(
samples
:
[
Float
],
sampleRate
:
Int
=
16000
)
{
SherpaOnnxOnlineStreamAcceptWaveform
(
impl
,
Int32
(
sampleRate
),
samples
,
Int32
(
samples
.
count
))
}
func
inputFinished
()
{
SherpaOnnxOnlineStreamInputFinished
(
impl
)
}
}
class
SherpaOnnxSpeakerEmbeddingExtractorWrapper
{
/// A pointer to the underlying counterpart in C
let
impl
:
OpaquePointer
!
init
(
config
:
UnsafePointer
<
SherpaOnnxSpeakerEmbeddingExtractorConfig
>!
)
{
impl
=
SherpaOnnxCreateSpeakerEmbeddingExtractor
(
config
)
}
deinit
{
if
let
impl
{
SherpaOnnxDestroySpeakerEmbeddingExtractor
(
impl
)
}
}
var
dim
:
Int
{
return
Int
(
SherpaOnnxSpeakerEmbeddingExtractorDim
(
impl
))
}
func
createStream
()
->
SherpaOnnxOnlineStreamWrapper
{
let
newStream
=
SherpaOnnxSpeakerEmbeddingExtractorCreateStream
(
impl
)
return
SherpaOnnxOnlineStreamWrapper
(
impl
:
newStream
)
}
func
isReady
(
stream
:
SherpaOnnxOnlineStreamWrapper
)
->
Bool
{
return
SherpaOnnxSpeakerEmbeddingExtractorIsReady
(
impl
,
stream
.
impl
)
==
1
?
true
:
false
}
func
compute
(
stream
:
SherpaOnnxOnlineStreamWrapper
)
->
[
Float
]
{
if
!
isReady
(
stream
:
stream
)
{
return
[]
}
let
p
=
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
(
impl
,
stream
.
impl
)
defer
{
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
p
)
}
return
[
Float
](
UnsafeBufferPointer
(
start
:
p
,
count
:
dim
))
}
}
func
sherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
(
model
:
String
=
""
)
->
SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
{
...
...
swift-api-examples/compute-speaker-embeddings.swift
0 → 100644
查看文件 @
283d8fe
/// swift-api-examples/compute-speaker-embeddings.swift
/// Copyright (c) 2025 Xiaomi Corporation
/*
Please download test files used in this script from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
*/
func
cosineSimilarity
(
_
a
:
[
Float
],
_
b
:
[
Float
])
->
Float
{
precondition
(
a
.
count
==
b
.
count
,
"Vectors must have the same length"
)
// Dot product
let
dotProduct
=
zip
(
a
,
b
)
.
reduce
(
0
)
{
$0
+
$1
.
0
*
$1
.
1
}
// Magnitudes
let
magA
=
sqrt
(
a
.
reduce
(
0
)
{
$0
+
$1
*
$1
})
let
magB
=
sqrt
(
b
.
reduce
(
0
)
{
$0
+
$1
*
$1
})
// Avoid division by zero
guard
magA
>
0
&&
magB
>
0
else
{
return
0
}
return
dotProduct
/
(
magA
*
magB
)
}
func
computeEmbedding
(
extractor
:
SherpaOnnxSpeakerEmbeddingExtractorWrapper
,
waveFilename
:
String
)
->
[
Float
]
{
let
audio
=
SherpaOnnxWaveWrapper
.
readWave
(
filename
:
waveFilename
)
let
stream
=
extractor
.
createStream
()
stream
.
acceptWaveform
(
samples
:
audio
.
samples
,
sampleRate
:
audio
.
sampleRate
)
stream
.
inputFinished
()
return
extractor
.
compute
(
stream
:
stream
)
}
func
run
()
{
let
model
=
"./wespeaker_zh_cnceleb_resnet34.onnx"
var
config
=
sherpaOnnxSpeakerEmbeddingExtractorConfig
(
model
:
model
)
let
extractor
=
SherpaOnnxSpeakerEmbeddingExtractorWrapper
(
config
:
&
config
)
let
embedding1
=
computeEmbedding
(
extractor
:
extractor
,
waveFilename
:
"./fangjun-sr-1.wav"
)
let
embedding2
=
computeEmbedding
(
extractor
:
extractor
,
waveFilename
:
"./fangjun-sr-2.wav"
)
let
embedding3
=
computeEmbedding
(
extractor
:
extractor
,
waveFilename
:
"./leijun-sr-1.wav"
)
let
score12
=
cosineSimilarity
(
embedding1
,
embedding2
)
let
score13
=
cosineSimilarity
(
embedding1
,
embedding3
)
let
score23
=
cosineSimilarity
(
embedding2
,
embedding3
)
print
(
"Score between spk1 and spk2:
\(
score12
)
"
)
print
(
"Score between spk1 and spk3:
\(
score13
)
"
)
print
(
"Score between spk2 and spk3:
\(
score23
)
"
)
}
@main
struct
App
{
static
func
main
()
{
run
()
}
}
...
...
swift-api-examples/run-compute-speaker-embeddings.sh
0 → 100755
查看文件 @
283d8fe
#!/usr/bin/env bash
set
-ex
if
[
! -d ../build-swift-macos
]
;
then
echo
"Please run ../build-swift-macos.sh first!"
exit
1
fi
if
[
! -f ./wespeaker_zh_cnceleb_resnet34.onnx
]
;
then
echo
"Please download the pre-trained model for testing."
echo
"You can refer to"
echo
""
echo
"https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models"
echo
""
echo
"for help"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx
fi
if
[
! -f ./fangjun-sr-1.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/fangjun-sr-1.wav
fi
if
[
! -f ./fangjun-sr-2.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/fangjun-sr-2.wav
fi
if
[
! -f ./leijun-sr-1.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/leijun-sr-1.wav
fi
if
[
! -e ./compute-speaker-embeddings
]
;
then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc
\
-lc++
\
-I ../build-swift-macos/install/include
\
-import-objc-header ./SherpaOnnx-Bridging-Header.h
\
./compute-speaker-embeddings.swift ./SherpaOnnx.swift
\
-L ../build-swift-macos/install/lib/
\
-l sherpa-onnx
\
-l onnxruntime
\
-o compute-speaker-embeddings
strip compute-speaker-embeddings
else
echo
"./compute-speaker-embeddings exists - skip building"
fi
export
DYLD_LIBRARY_PATH
=
$PWD
/../build-swift-macos/install/lib:
$DYLD_LIBRARY_PATH
./compute-speaker-embeddings
...
...
请
注册
或
登录
后发表评论