Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-10-10 14:29:05 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-10-10 14:29:05 +0800
Commit
a45e5dba9986f07f13d4f64b13ff77589a9909e3
a45e5dba
1 parent
bd50e795
C# API for speaker diarization (#1407)
隐藏空白字符变更
内嵌
并排对比
正在显示
12 个修改的文件
包含
408 行增加
和
51 行删除
.github/scripts/test-dot-net.sh
.github/workflows/test-dot-net.yaml
dotnet-examples/offline-speaker-diarization/Program.cs
dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
dotnet-examples/offline-speaker-diarization/run.sh
dotnet-examples/sherpa-onnx.sln
scripts/dotnet/FastClusteringConfig.cs
scripts/dotnet/OfflineSpeakerDiarization.cs
scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
scripts/dotnet/OfflineSpeakerSegmentationPyannoteModelConfig.cs
.github/scripts/test-dot-net.sh
查看文件 @
a45e5db
...
...
@@ -2,7 +2,13 @@
cd
dotnet-examples/
cd
./offline-decode-files
cd
./offline-speaker-diarization
./run.sh
rm -rfv
*
.onnx
rm -fv
*
.wav
rm -rfv sherpa-onnx-pyannote-
*
cd
../offline-decode-files
./run-sense-voice-ctc.sh
rm -rf sherpa-onnx-
*
...
...
.github/workflows/test-dot-net.yaml
查看文件 @
a45e5db
...
...
@@ -47,53 +47,10 @@ jobs:
with
:
fetch-depth
:
0
-
name
:
Free space
if
:
matrix.os == 'ubuntu-latest'
shell
:
bash
run
:
|
df -h
rm -rf /opt/hostedtoolcache
df -h
-
name
:
Free more space
if
:
matrix.os == 'ubuntu-latest'
shell
:
bash
run
:
|
# https://github.com/orgs/community/discussions/25678
cd /opt
find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
sudo rm -rf /usr/share/dotnet
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-
name
:
Free Disk Space (Ubuntu)
if
:
matrix.os == 'ubuntu-latest'
uses
:
jlumbroso/free-disk-space@main
with
:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache
:
false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android
:
true
dotnet
:
false
haskell
:
true
large-packages
:
true
docker-images
:
false
swap-storage
:
true
-
name
:
Check space
if
:
matrix.os == 'ubuntu-latest'
shell
:
bash
run
:
|
df -h
-
name
:
ccache
uses
:
hendrikmuhs/ccache-action@v1.2
with
:
key
:
${{ matrix.os }}-release-shared
key
:
${{ matrix.os }}-
dotnet-
release-shared
-
name
:
Build sherpa-onnx
shell
:
bash
...
...
@@ -110,11 +67,16 @@ jobs:
-DCMAKE_BUILD_TYPE=Release \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=O
N
\
-DSHERPA_ONNX_ENABLE_BINARY=O
FF
\
..
cmake --build . --target install --config Release
rm -rf install/share
rm -rf install/lib/pkg*
ls -lh ./install/lib
-
uses
:
actions/upload-artifact@v4
with
:
name
:
${{ matrix.os }}
...
...
@@ -148,7 +110,7 @@ jobs:
uses
:
actions/download-artifact@v4
with
:
name
:
ubuntu-latest
path
:
/tmp/linux
path
:
/tmp/linux
-x64
-
name
:
Setup .NET
uses
:
actions/setup-dotnet@v4
...
...
@@ -162,17 +124,21 @@ jobs:
- name
:
Display files
shell
:
bash
run
:
|
echo "----------/tmp/----------"
ls -lh /tmp/
echo "----------/tmp----------"
ls -lh /tmp
echo "----------/tmp/linux----------"
ls -lh /tmp/linux
echo "----------/tmp/linux-x64----------"
ls -lh /tmp/linux-x64
df -h
-
name
:
Build
shell
:
bash
run
:
|
cd scripts/dotnet
./run.sh
df -h
ls -lh /tmp/packages
-
name
:
Copy files
shell
:
bash
...
...
@@ -181,9 +147,14 @@ jobs:
ls -lh /tmp
df -h
-
name
:
Run tests
shell
:
bash
run
:
|
dotnet nuget locals all --clear
df -h
.github/scripts/test-dot-net.sh
-
uses
:
actions/upload-artifact@v4
...
...
dotnet-examples/offline-speaker-diarization/Program.cs
0 → 100644
查看文件 @
a45e5db
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to use sherpa-onnx C# API for speaker diarization
/*
Usage:
Step 1: Download a speaker segmentation model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
Step 2: Download a speaker embedding extractor model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
Step 3. Download test wave files
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
Step 4. Run it
dotnet run
*/
using
SherpaOnnx
;
using
System
;
class
OfflineSpeakerDiarizationDemo
{
static
void
Main
(
string
[]
args
)
{
var
config
=
new
OfflineSpeakerDiarizationConfig
();
config
.
Segmentation
.
Pyannote
.
Model
=
"./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
;
config
.
Embedding
.
Model
=
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
;
// the test wave ./0-four-speakers-zh.wav has 4 speakers, so
// we set num_clusters to 4
//
config
.
Clustering
.
NumClusters
=
4
;
// If you don't know the number of speakers in the test wave file, please
// use
// config.Clustering.Threshold = 0.5; // You need to tune this threshold
var
sd
=
new
OfflineSpeakerDiarization
(
config
);
var
testWaveFile
=
"./0-four-speakers-zh.wav"
;
WaveReader
waveReader
=
new
WaveReader
(
testWaveFile
);
if
(
sd
.
SampleRate
!=
waveReader
.
SampleRate
)
{
Console
.
WriteLine
(
$
"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"
);
return
;
}
Console
.
WriteLine
(
"Started"
);
// var segments = sd.Process(waveReader.Samples); // this one is also ok
var
MyProgressCallback
=
(
int
numProcessedChunks
,
int
numTotalChunks
,
IntPtr
arg
)
=>
{
float
progress
=
100.0F
*
numProcessedChunks
/
numTotalChunks
;
Console
.
WriteLine
(
"Progress {0}%"
,
String
.
Format
(
"{0:0.00}"
,
progress
));
return
0
;
};
var
callback
=
new
OfflineSpeakerDiarizationProgressCallback
(
MyProgressCallback
);
var
segments
=
sd
.
ProcessWithCallback
(
waveReader
.
Samples
,
callback
,
IntPtr
.
Zero
);
foreach
(
var
s
in
segments
)
{
Console
.
WriteLine
(
"{0} -- {1} speaker_{2}"
,
String
.
Format
(
"{0:0.00}"
,
s
.
Start
),
String
.
Format
(
"{0:0.00}"
,
s
.
End
),
s
.
Speaker
);
}
}
}
...
...
dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
0 → 100644
查看文件 @
a45e5db
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>offline_speaker_diarization</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>
</Project>
...
...
dotnet-examples/offline-speaker-diarization/run.sh
0 → 100755
查看文件 @
a45e5db
#!/usr/bin/env bash
if
[
! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi
if
[
! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi
if
[
! -f ./0-four-speakers-zh.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi
dotnet run
...
...
dotnet-examples/sherpa-onnx.sln
查看文件 @
a45e5db
...
...
@@ -31,6 +31,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-micro
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
...
...
@@ -93,6 +95,10 @@ Global
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
...
...
scripts/dotnet/FastClusteringConfig.cs
0 → 100644
查看文件 @
a45e5db
/// Copyright (c) 2024 Xiaomi Corporation
using
System.Runtime.InteropServices
;
namespace
SherpaOnnx
{
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
FastClusteringConfig
{
public
FastClusteringConfig
()
{
NumClusters
=
-
1
;
Threshold
=
0.5F
;
}
public
int
NumClusters
;
public
float
Threshold
;
}
}
...
...
scripts/dotnet/OfflineSpeakerDiarization.cs
0 → 100644
查看文件 @
a45e5db
/// Copyright (c) 2024 Xiaomi Corporation
using
System
;
using
System.Runtime.InteropServices
;
using
System.Text
;
namespace
SherpaOnnx
{
// IntPtr is actually a `const float*` from C++
public
delegate
int
OfflineSpeakerDiarizationProgressCallback
(
int
numProcessedChunks
,
int
numTotalChunks
,
IntPtr
arg
);
public
class
OfflineSpeakerDiarization
:
IDisposable
{
public
OfflineSpeakerDiarization
(
OfflineSpeakerDiarizationConfig
config
)
{
IntPtr
h
=
SherpaOnnxCreateOfflineSpeakerDiarization
(
ref
config
);
_handle
=
new
HandleRef
(
this
,
h
);
}
public
OfflineSpeakerDiarizationSegment
[]
Process
(
float
[]
samples
)
{
IntPtr
result
=
SherpaOnnxOfflineSpeakerDiarizationProcess
(
_handle
.
Handle
,
samples
,
samples
.
Length
);
return
ProcessImpl
(
result
);
}
public
OfflineSpeakerDiarizationSegment
[]
ProcessWithCallback
(
float
[]
samples
,
OfflineSpeakerDiarizationProgressCallback
callback
,
IntPtr
arg
)
{
IntPtr
result
=
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
(
_handle
.
Handle
,
samples
,
samples
.
Length
,
callback
,
arg
);
return
ProcessImpl
(
result
);
}
private
OfflineSpeakerDiarizationSegment
[]
ProcessImpl
(
IntPtr
result
)
{
if
(
result
==
IntPtr
.
Zero
)
{
return
new
OfflineSpeakerDiarizationSegment
[]
{};
}
int
numSegments
=
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
(
result
);
IntPtr
p
=
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
(
result
);
OfflineSpeakerDiarizationSegment
[]
ans
=
new
OfflineSpeakerDiarizationSegment
[
numSegments
];
unsafe
{
int
size
=
sizeof
(
float
)
*
2
+
sizeof
(
int
);
for
(
int
i
=
0
;
i
!=
numSegments
;
++
i
)
{
IntPtr
t
=
new
IntPtr
((
byte
*)
p
+
i
*
size
);
ans
[
i
]
=
new
OfflineSpeakerDiarizationSegment
(
t
);
// The following IntPtr.Add() does not support net20
// ans[i] = new OfflineSpeakerDiarizationSegment(IntPtr.Add(p, i));
}
}
SherpaOnnxOfflineSpeakerDiarizationDestroySegment
(
p
);
SherpaOnnxOfflineSpeakerDiarizationDestroyResult
(
result
);
return
ans
;
}
public
void
Dispose
()
{
Cleanup
();
// Prevent the object from being placed on the
// finalization queue
System
.
GC
.
SuppressFinalize
(
this
);
}
~
OfflineSpeakerDiarization
()
{
Cleanup
();
}
private
void
Cleanup
()
{
SherpaOnnxDestroyOfflineSpeakerDiarization
(
_handle
.
Handle
);
// Don't permit the handle to be used again.
_handle
=
new
HandleRef
(
this
,
IntPtr
.
Zero
);
}
private
HandleRef
_handle
;
public
int
SampleRate
{
get
{
return
SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
(
_handle
.
Handle
);
}
}
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxCreateOfflineSpeakerDiarization
(
ref
OfflineSpeakerDiarizationConfig
config
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxDestroyOfflineSpeakerDiarization
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxOfflineSpeakerDiarizationProcess
(
IntPtr
handle
,
float
[]
samples
,
int
n
);
[
DllImport
(
Dll
.
Filename
,
CallingConvention
=
CallingConvention
.
Cdecl
)]
private
static
extern
IntPtr
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback
(
IntPtr
handle
,
float
[]
samples
,
int
n
,
OfflineSpeakerDiarizationProgressCallback
callback
,
IntPtr
arg
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxOfflineSpeakerDiarizationDestroyResult
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxOfflineSpeakerDiarizationDestroySegment
(
IntPtr
handle
);
}
}
...
...
scripts/dotnet/OfflineSpeakerDiarizationConfig.cs
0 → 100644
查看文件 @
a45e5db
/// Copyright (c) 2024 Xiaomi Corporation
using
System.Runtime.InteropServices
;
namespace
SherpaOnnx
{
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
OfflineSpeakerDiarizationConfig
{
public
OfflineSpeakerDiarizationConfig
()
{
Segmentation
=
new
OfflineSpeakerSegmentationModelConfig
();
Embedding
=
new
SpeakerEmbeddingExtractorConfig
();
Clustering
=
new
FastClusteringConfig
();
MinDurationOn
=
0.3F
;
MinDurationOff
=
0.5F
;
}
public
OfflineSpeakerSegmentationModelConfig
Segmentation
;
public
SpeakerEmbeddingExtractorConfig
Embedding
;
public
FastClusteringConfig
Clustering
;
public
float
MinDurationOn
;
public
float
MinDurationOff
;
}
}
...
...
scripts/dotnet/OfflineSpeakerDiarizationSegment.cs
0 → 100644
查看文件 @
a45e5db
/// Copyright (c) 2024 Xiaomi Corporation
using
System
;
using
System.Runtime.InteropServices
;
using
System.Text
;
namespace
SherpaOnnx
{
public
class
OfflineSpeakerDiarizationSegment
{
public
OfflineSpeakerDiarizationSegment
(
IntPtr
handle
)
{
Impl
impl
=
(
Impl
)
Marshal
.
PtrToStructure
(
handle
,
typeof
(
Impl
));
Start
=
impl
.
Start
;
End
=
impl
.
End
;
Speaker
=
impl
.
Speaker
;
}
[
StructLayout
(
LayoutKind
.
Sequential
)]
struct
Impl
{
public
float
Start
;
public
float
End
;
public
int
Speaker
;
}
public
float
Start
;
public
float
End
;
public
int
Speaker
;
}
}
...
...
scripts/dotnet/OfflineSpeakerSegmentationModelConfig.cs
0 → 100644
查看文件 @
a45e5db
/// Copyright (c) 2024 Xiaomi Corporation
using
System.Runtime.InteropServices
;
namespace
SherpaOnnx
{
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
OfflineSpeakerSegmentationModelConfig
{
public
OfflineSpeakerSegmentationModelConfig
()
{
Pyannote
=
new
OfflineSpeakerSegmentationPyannoteModelConfig
();
NumThreads
=
1
;
Debug
=
0
;
Provider
=
"cpu"
;
}
public
OfflineSpeakerSegmentationPyannoteModelConfig
Pyannote
;
/// Number of threads used to run the neural network model
public
int
NumThreads
;
/// true to print debug information of the model
public
int
Debug
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Provider
;
}
}
...
...
scripts/dotnet/OfflineSpeakerSegmentationPyannoteModelConfig.cs
0 → 100644
查看文件 @
a45e5db
/// Copyright (c) 2024 Xiaomi Corporation
using
System.Runtime.InteropServices
;
namespace
SherpaOnnx
{
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
OfflineSpeakerSegmentationPyannoteModelConfig
{
public
OfflineSpeakerSegmentationPyannoteModelConfig
()
{
Model
=
""
;
}
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Model
;
}
}
...
...
请
注册
或
登录
后发表评论