Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-10-09 23:25:39 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-10-09 23:25:39 +0800
Commit
15713445095fc8bedef7087079021b1eb31ed08b
15713445
1 parent
df681e98
Swift API for speaker diarization (#1404)
隐藏空白字符变更
内嵌
并排对比
正在显示
4 个修改的文件
包含
209 行增加
和
0 行删除
.github/scripts/test-swift.sh
swift-api-examples/SherpaOnnx.swift
swift-api-examples/run-speaker-diarization.sh
swift-api-examples/speaker-diarization.swift
.github/scripts/test-swift.sh
查看文件 @
1571344
...
...
@@ -7,6 +7,11 @@ echo "pwd: $PWD"
cd
swift-api-examples
ls -lh
./run-speaker-diarization.sh
rm -rf
*
.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0
rm -fv
*
.wav
./run-add-punctuations.sh
rm ./add-punctuations
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
...
...
swift-api-examples/SherpaOnnx.swift
查看文件 @
1571344
...
...
@@ -1078,3 +1078,116 @@ class SherpaOnnxOfflinePunctuationWrapper {
return
ans
}
}
func
sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
(
model
:
String
)
->
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
{
return
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
(
model
:
toCPointer
(
model
))
}
func
sherpaOnnxOfflineSpeakerSegmentationModelConfig
(
pyannote
:
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
,
numThreads
:
Int
=
1
,
debug
:
Int
=
0
,
provider
:
String
=
"cpu"
)
->
SherpaOnnxOfflineSpeakerSegmentationModelConfig
{
return
SherpaOnnxOfflineSpeakerSegmentationModelConfig
(
pyannote
:
pyannote
,
num_threads
:
Int32
(
numThreads
),
debug
:
Int32
(
debug
),
provider
:
toCPointer
(
provider
)
)
}
func
sherpaOnnxFastClusteringConfig
(
numClusters
:
Int
=
-
1
,
threshold
:
Float
=
0.5
)
->
SherpaOnnxFastClusteringConfig
{
return
SherpaOnnxFastClusteringConfig
(
num_clusters
:
Int32
(
numClusters
),
threshold
:
threshold
)
}
func
sherpaOnnxSpeakerEmbeddingExtractorConfig
(
model
:
String
,
numThreads
:
Int
=
1
,
debug
:
Int
=
0
,
provider
:
String
=
"cpu"
)
->
SherpaOnnxSpeakerEmbeddingExtractorConfig
{
return
SherpaOnnxSpeakerEmbeddingExtractorConfig
(
model
:
toCPointer
(
model
),
num_threads
:
Int32
(
numThreads
),
debug
:
Int32
(
debug
),
provider
:
toCPointer
(
provider
)
)
}
func
sherpaOnnxOfflineSpeakerDiarizationConfig
(
segmentation
:
SherpaOnnxOfflineSpeakerSegmentationModelConfig
,
embedding
:
SherpaOnnxSpeakerEmbeddingExtractorConfig
,
clustering
:
SherpaOnnxFastClusteringConfig
,
minDurationOn
:
Float
=
0.3
,
minDurationOff
:
Float
=
0.5
)
->
SherpaOnnxOfflineSpeakerDiarizationConfig
{
return
SherpaOnnxOfflineSpeakerDiarizationConfig
(
segmentation
:
segmentation
,
embedding
:
embedding
,
clustering
:
clustering
,
min_duration_on
:
minDurationOn
,
min_duration_off
:
minDurationOff
)
}
struct
SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper
{
var
start
:
Float
=
0
var
end
:
Float
=
0
var
speaker
:
Int
=
0
}
class
SherpaOnnxOfflineSpeakerDiarizationWrapper
{
/// A pointer to the underlying counterpart in C
let
impl
:
OpaquePointer
!
init
(
config
:
UnsafePointer
<
SherpaOnnxOfflineSpeakerDiarizationConfig
>!
)
{
impl
=
SherpaOnnxCreateOfflineSpeakerDiarization
(
config
)
}
deinit
{
if
let
impl
{
SherpaOnnxDestroyOfflineSpeakerDiarization
(
impl
)
}
}
var
sampleRate
:
Int
{
return
Int
(
SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
(
impl
))
}
func
process
(
samples
:
[
Float
])
->
[
SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper
]
{
let
result
=
SherpaOnnxOfflineSpeakerDiarizationProcess
(
impl
,
samples
,
Int32
(
samples
.
count
))
if
result
==
nil
{
return
[]
}
let
numSegments
=
Int
(
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
(
result
))
let
p
:
UnsafePointer
<
SherpaOnnxOfflineSpeakerDiarizationSegment
>
?
=
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
(
result
)
if
p
==
nil
{
return
[]
}
var
ans
:
[
SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper
]
=
[]
for
i
in
0
..<
numSegments
{
ans
.
append
(
SherpaOnnxOfflineSpeakerDiarizationSegmentWrapper
(
start
:
p
!
[
i
]
.
start
,
end
:
p
!
[
i
]
.
end
,
speaker
:
Int
(
p
!
[
i
]
.
speaker
)))
}
SherpaOnnxOfflineSpeakerDiarizationDestroySegment
(
p
)
SherpaOnnxOfflineSpeakerDiarizationDestroyResult
(
result
)
return
ans
}
}
...
...
swift-api-examples/run-speaker-diarization.sh
0 → 100755
查看文件 @
1571344
#!/usr/bin/env bash
if
[
! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi
if
[
! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi
if
[
! -f ./0-four-speakers-zh.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi
if
[
! -e ./speaker-diarization
]
;
then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc
\
-lc++
\
-I ../build-swift-macos/install/include
\
-import-objc-header ./SherpaOnnx-Bridging-Header.h
\
./speaker-diarization.swift ./SherpaOnnx.swift
\
-L ../build-swift-macos/install/lib/
\
-l sherpa-onnx
\
-l onnxruntime
\
-o speaker-diarization
strip speaker-diarization
else
echo
"./speaker-diarization exists - skip building"
fi
export
DYLD_LIBRARY_PATH
=
$PWD
/../build-swift-macos/install/lib:
$DYLD_LIBRARY_PATH
./speaker-diarization
...
...
swift-api-examples/speaker-diarization.swift
0 → 100644
查看文件 @
1571344
import
AVFoundation
extension
AudioBuffer
{
func
array
()
->
[
Float
]
{
return
Array
(
UnsafeBufferPointer
(
self
))
}
}
extension
AVAudioPCMBuffer
{
func
array
()
->
[
Float
]
{
return
self
.
audioBufferList
.
pointee
.
mBuffers
.
array
()
}
}
func
run
()
{
let
segmentationModel
=
"./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
let
embeddingExtractorModel
=
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
let
waveFilename
=
"./0-four-speakers-zh.wav"
// There are 4 speakers in ./0-four-speakers-zh.wav, so we use 4 here
let
numSpeakers
=
4
var
config
=
sherpaOnnxOfflineSpeakerDiarizationConfig
(
segmentation
:
sherpaOnnxOfflineSpeakerSegmentationModelConfig
(
pyannote
:
sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
(
model
:
segmentationModel
)),
embedding
:
sherpaOnnxSpeakerEmbeddingExtractorConfig
(
model
:
embeddingExtractorModel
),
clustering
:
sherpaOnnxFastClusteringConfig
(
numClusters
:
numSpeakers
)
)
let
sd
=
SherpaOnnxOfflineSpeakerDiarizationWrapper
(
config
:
&
config
)
let
fileURL
:
NSURL
=
NSURL
(
fileURLWithPath
:
waveFilename
)
let
audioFile
=
try!
AVAudioFile
(
forReading
:
fileURL
as
URL
)
let
audioFormat
=
audioFile
.
processingFormat
assert
(
Int
(
audioFormat
.
sampleRate
)
==
sd
.
sampleRate
)
assert
(
audioFormat
.
channelCount
==
1
)
assert
(
audioFormat
.
commonFormat
==
AVAudioCommonFormat
.
pcmFormatFloat32
)
let
audioFrameCount
=
UInt32
(
audioFile
.
length
)
let
audioFileBuffer
=
AVAudioPCMBuffer
(
pcmFormat
:
audioFormat
,
frameCapacity
:
audioFrameCount
)
try!
audioFile
.
read
(
into
:
audioFileBuffer
!
)
let
array
:
[
Float
]
!
=
audioFileBuffer
?
.
array
()
print
(
"Started!"
)
let
segments
=
sd
.
process
(
samples
:
array
)
for
i
in
0
..<
segments
.
count
{
print
(
"
\(
segments
[
i
]
.
start
)
--
\(
segments
[
i
]
.
end
)
speaker_
\(
segments
[
i
]
.
speaker
)
"
)
}
}
@main
struct
App
{
static
func
main
()
{
run
()
}
}
...
...
请
注册
或
登录
后发表评论