Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-05-11 13:27:33 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-05-11 13:27:33 +0800
Commit
677bc1da3e2b5be192b647b1a1842e052c10c792
677bc1da
1 parent
a88b3bac
Add Speaker ID demo for C# (#862)
隐藏空白字符变更
内嵌
并排对比
正在显示
10 个修改的文件
包含
511 行增加
和
2 行删除
.github/scripts/test-dot-net.sh
.github/workflows/test-dot-net.yaml
CMakeLists.txt
dotnet-examples/sherpa-onnx.sln
dotnet-examples/speaker-identification/Program.cs
dotnet-examples/speaker-identification/WaveReader.cs
dotnet-examples/speaker-identification/run.sh
dotnet-examples/speaker-identification/speaker-identification.csproj
scripts/dotnet/examples/speaker-identification.csproj
scripts/dotnet/offline.cs
.github/scripts/test-dot-net.sh
查看文件 @
677bc1d
...
...
@@ -2,7 +2,10 @@
cd
dotnet-examples/
cd
streaming-hlg-decoding/
cd
speaker-identification
./run.sh
cd
../streaming-hlg-decoding/
./run.sh
cd
../spoken-language-identification
...
...
.github/workflows/test-dot-net.yaml
查看文件 @
677bc1d
...
...
@@ -179,6 +179,7 @@ jobs:
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
ls -lh /tmp
...
...
CMakeLists.txt
查看文件 @
677bc1d
cmake_minimum_required
(
VERSION 3.13 FATAL_ERROR
)
project
(
sherpa-onnx
)
set
(
SHERPA_ONNX_VERSION
"1.9.2
3
"
)
set
(
SHERPA_ONNX_VERSION
"1.9.2
4
"
)
# Disable warning about
#
...
...
dotnet-examples/sherpa-onnx.sln
查看文件 @
677bc1d
...
...
@@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
...
...
@@ -54,5 +56,9 @@ Global
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
...
...
dotnet-examples/speaker-identification/Program.cs
0 → 100644
查看文件 @
677bc1d
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do speaker identification with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
//
// 2. Download test data from
//
// git clone https://github.com/csukuangfj/sr-data
//
// 3. Now run it
//
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
SpeakerIdentificationDemo
{
public
static
float
[]
ComputeEmbedding
(
SpeakerEmbeddingExtractor
extractor
,
String
filename
)
{
WaveReader
reader
=
new
WaveReader
(
filename
);
OnlineStream
stream
=
extractor
.
CreateStream
();
stream
.
AcceptWaveform
(
reader
.
SampleRate
,
reader
.
Samples
);
stream
.
InputFinished
();
float
[]
embedding
=
extractor
.
Compute
(
stream
);
return
embedding
;
}
static
void
Main
(
string
[]
args
)
{
var
config
=
new
SpeakerEmbeddingExtractorConfig
();
config
.
Model
=
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
;
config
.
Debug
=
1
;
var
extractor
=
new
SpeakerEmbeddingExtractor
(
config
);
var
manager
=
new
SpeakerEmbeddingManager
(
extractor
.
Dim
);
string
[]
spk1Files
=
new
string
[]
{
"./sr-data/enroll/fangjun-sr-1.wav"
,
"./sr-data/enroll/fangjun-sr-2.wav"
,
"./sr-data/enroll/fangjun-sr-3.wav"
,
};
float
[][]
spk1Vec
=
new
float
[
spk1Files
.
Length
][];
for
(
int
i
=
0
;
i
<
spk1Files
.
Length
;
++
i
)
{
spk1Vec
[
i
]
=
ComputeEmbedding
(
extractor
,
spk1Files
[
i
]);
}
string
[]
spk2Files
=
new
string
[]
{
"./sr-data/enroll/leijun-sr-1.wav"
,
"./sr-data/enroll/leijun-sr-2.wav"
,
};
float
[][]
spk2Vec
=
new
float
[
spk2Files
.
Length
][];
for
(
int
i
=
0
;
i
<
spk2Files
.
Length
;
++
i
)
{
spk2Vec
[
i
]
=
ComputeEmbedding
(
extractor
,
spk2Files
[
i
]);
}
if
(!
manager
.
Add
(
"fangjun"
,
spk1Vec
))
{
Console
.
WriteLine
(
"Failed to register fangjun"
);
return
;
}
if
(!
manager
.
Add
(
"leijun"
,
spk2Vec
))
{
Console
.
WriteLine
(
"Failed to register leijun"
);
return
;
}
if
(
manager
.
NumSpeakers
!=
2
)
{
Console
.
WriteLine
(
"There should be two speakers"
);
return
;
}
if
(!
manager
.
Contains
(
"fangjun"
))
{
Console
.
WriteLine
(
"It should contain the speaker fangjun"
);
return
;
}
if
(!
manager
.
Contains
(
"leijun"
))
{
Console
.
WriteLine
(
"It should contain the speaker leijun"
);
return
;
}
Console
.
WriteLine
(
"---All speakers---"
);
string
[]
allSpeakers
=
manager
.
GetAllSpeakers
();
foreach
(
var
s
in
allSpeakers
)
{
Console
.
WriteLine
(
s
);
}
Console
.
WriteLine
(
"------------"
);
string
[]
testFiles
=
new
string
[]
{
"./sr-data/test/fangjun-test-sr-1.wav"
,
"./sr-data/test/leijun-test-sr-1.wav"
,
"./sr-data/test/liudehua-test-sr-1.wav"
};
float
threshold
=
0.6f
;
foreach
(
var
file
in
testFiles
)
{
float
[]
embedding
=
ComputeEmbedding
(
extractor
,
file
);
String
name
=
manager
.
Search
(
embedding
,
threshold
);
if
(
name
==
""
)
{
name
=
"<Unknown>"
;
}
Console
.
WriteLine
(
"{0}: {1}"
,
file
,
name
);
}
// test verify
if
(!
manager
.
Verify
(
"fangjun"
,
ComputeEmbedding
(
extractor
,
testFiles
[
0
]),
threshold
))
{
Console
.
WriteLine
(
"testFiles[0] should match fangjun!"
);
return
;
}
if
(!
manager
.
Remove
(
"fangjun"
))
{
Console
.
WriteLine
(
"Failed to remove fangjun"
);
return
;
}
if
(
manager
.
Verify
(
"fangjun"
,
ComputeEmbedding
(
extractor
,
testFiles
[
0
]),
threshold
))
{
Console
.
WriteLine
(
"{0} should match no one!"
,
testFiles
[
0
]);
return
;
}
if
(
manager
.
NumSpeakers
!=
1
)
{
Console
.
WriteLine
(
"There should only 1 speaker left."
);
return
;
}
}
}
...
...
dotnet-examples/speaker-identification/WaveReader.cs
0 → 120000
查看文件 @
677bc1d
../
offline
-
decode
-
files
/
WaveReader
.
cs
\ No newline at end of file
...
...
dotnet-examples/speaker-identification/run.sh
0 → 100755
查看文件 @
677bc1d
#!/usr/bin/env bash
set
-ex
if
[
! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi
if
[
! -d ./sr-data
]
;
then
git clone https://github.com/csukuangfj/sr-data
fi
dotnet run
...
...
dotnet-examples/speaker-identification/speaker-identification.csproj
0 → 100644
查看文件 @
677bc1d
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
...
...
scripts/dotnet/examples/speaker-identification.csproj
0 → 100644
查看文件 @
677bc1d
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
...
...
scripts/dotnet/offline.cs
查看文件 @
677bc1d
...
...
@@ -222,6 +222,14 @@ namespace SherpaOnnx
}
}
public
int
NumSpeakers
{
get
{
return
SherpaOnnxOfflineTtsNumSpeakers
(
_handle
.
Handle
);
}
}
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxCreateOfflineTts
(
ref
OfflineTtsConfig
config
);
...
...
@@ -232,6 +240,9 @@ namespace SherpaOnnx
private
static
extern
int
SherpaOnnxOfflineTtsSampleRate
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxOfflineTtsNumSpeakers
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxOfflineTtsGenerate
(
IntPtr
handle
,
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
string
text
,
int
sid
,
float
speed
);
[
DllImport
(
Dll
.
Filename
,
CallingConvention
=
CallingConvention
.
Cdecl
)]
...
...
@@ -557,6 +568,112 @@ namespace SherpaOnnx
}
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
SpeakerEmbeddingExtractorConfig
{
public
SpeakerEmbeddingExtractorConfig
()
{
Model
=
""
;
NumThreads
=
1
;
Debug
=
0
;
Provider
=
"cpu"
;
}
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Model
;
public
int
NumThreads
;
public
int
Debug
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Provider
;
}
public
class
SpeakerEmbeddingExtractor
:
IDisposable
{
public
SpeakerEmbeddingExtractor
(
SpeakerEmbeddingExtractorConfig
config
)
{
IntPtr
h
=
SherpaOnnxCreateSpeakerEmbeddingExtractor
(
ref
config
);
_handle
=
new
HandleRef
(
this
,
h
);
}
public
OnlineStream
CreateStream
()
{
IntPtr
p
=
SherpaOnnxSpeakerEmbeddingExtractorCreateStream
(
_handle
.
Handle
);
return
new
OnlineStream
(
p
);
}
public
bool
IsReady
(
OnlineStream
stream
)
{
return
SherpaOnnxSpeakerEmbeddingExtractorIsReady
(
_handle
.
Handle
,
stream
.
Handle
)
!=
0
;
}
public
float
[]
Compute
(
OnlineStream
stream
)
{
IntPtr
p
=
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
(
_handle
.
Handle
,
stream
.
Handle
);
int
dim
=
Dim
;
float
[]
ans
=
new
float
[
dim
];
Marshal
.
Copy
(
p
,
ans
,
0
,
dim
);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
p
);
return
ans
;
}
public
int
Dim
{
get
{
return
SherpaOnnxSpeakerEmbeddingExtractorDim
(
_handle
.
Handle
);
}
}
public
void
Dispose
()
{
Cleanup
();
// Prevent the object from being placed on the
// finalization queue
System
.
GC
.
SuppressFinalize
(
this
);
}
~
SpeakerEmbeddingExtractor
()
{
Cleanup
();
}
private
void
Cleanup
()
{
SherpaOnnxDestroySpeakerEmbeddingExtractor
(
_handle
.
Handle
);
// Don't permit the handle to be used again.
_handle
=
new
HandleRef
(
this
,
IntPtr
.
Zero
);
}
private
HandleRef
_handle
;
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxCreateSpeakerEmbeddingExtractor
(
ref
SpeakerEmbeddingExtractorConfig
config
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxDestroySpeakerEmbeddingExtractor
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingExtractorDim
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxSpeakerEmbeddingExtractorCreateStream
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingExtractorIsReady
(
IntPtr
handle
,
IntPtr
stream
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
(
IntPtr
handle
,
IntPtr
stream
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
IntPtr
p
);
}
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
SpokenLanguageIdentificationWhisperConfig
{
public
SpokenLanguageIdentificationWhisperConfig
()
...
...
@@ -593,6 +710,185 @@ namespace SherpaOnnx
public
string
Provider
;
}
public
class
SpeakerEmbeddingManager
:
IDisposable
{
public
SpeakerEmbeddingManager
(
int
dim
)
{
IntPtr
h
=
SherpaOnnxCreateSpeakerEmbeddingManager
(
dim
);
_handle
=
new
HandleRef
(
this
,
h
);
this
.
_dim
=
dim
;
}
public
bool
Add
(
string
name
,
float
[]
v
)
{
return
SherpaOnnxSpeakerEmbeddingManagerAdd
(
_handle
.
Handle
,
name
,
v
)
==
1
;
}
public
bool
Add
(
string
name
,
ICollection
<
float
[
]>
v_list
)
{
int
n
=
v_list
.
Count
;
float
[]
v
=
new
float
[
n
*
_dim
];
int
i
=
0
;
foreach
(
var
item
in
v_list
)
{
item
.
CopyTo
(
v
,
i
);
i
+=
_dim
;
}
return
SherpaOnnxSpeakerEmbeddingManagerAddListFlattened
(
_handle
.
Handle
,
name
,
v
,
n
)
==
1
;
}
public
bool
Remove
(
string
name
)
{
return
SherpaOnnxSpeakerEmbeddingManagerRemove
(
_handle
.
Handle
,
name
)
==
1
;
}
public
string
Search
(
float
[]
v
,
float
threshold
)
{
IntPtr
p
=
SherpaOnnxSpeakerEmbeddingManagerSearch
(
_handle
.
Handle
,
v
,
threshold
);
string
s
=
""
;
int
length
=
0
;
unsafe
{
byte
*
b
=
(
byte
*)
p
;
if
(
b
!=
null
)
{
while
(*
b
!=
0
)
{
++
b
;
length
+=
1
;
}
}
}
if
(
length
>
0
)
{
byte
[]
stringBuffer
=
new
byte
[
length
];
Marshal
.
Copy
(
p
,
stringBuffer
,
0
,
length
);
s
=
Encoding
.
UTF8
.
GetString
(
stringBuffer
);
}
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
p
);
return
s
;
}
public
bool
Verify
(
string
name
,
float
[]
v
,
float
threshold
)
{
return
SherpaOnnxSpeakerEmbeddingManagerVerify
(
_handle
.
Handle
,
name
,
v
,
threshold
)
==
1
;
}
public
bool
Contains
(
string
name
)
{
return
SherpaOnnxSpeakerEmbeddingManagerContains
(
_handle
.
Handle
,
name
)
==
1
;
}
public
string
[]
GetAllSpeakers
()
{
if
(
NumSpeakers
==
0
)
{
return
new
string
[]
{
};
}
IntPtr
names
=
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
(
_handle
.
Handle
);
string
[]
ans
=
new
string
[
NumSpeakers
];
unsafe
{
byte
**
p
=
(
byte
**)
names
;
for
(
int
i
=
0
;
i
!=
NumSpeakers
;
i
++)
{
int
length
=
0
;
byte
*
s
=
p
[
i
];
while
(*
s
!=
0
)
{
++
s
;
length
+=
1
;
}
byte
[]
stringBuffer
=
new
byte
[
length
];
Marshal
.
Copy
((
IntPtr
)
p
[
i
],
stringBuffer
,
0
,
length
);
ans
[
i
]
=
Encoding
.
UTF8
.
GetString
(
stringBuffer
);
}
}
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
names
);
return
ans
;
}
public
void
Dispose
()
{
Cleanup
();
// Prevent the object from being placed on the
// finalization queue
System
.
GC
.
SuppressFinalize
(
this
);
}
~
SpeakerEmbeddingManager
()
{
Cleanup
();
}
private
void
Cleanup
()
{
SherpaOnnxDestroySpeakerEmbeddingManager
(
_handle
.
Handle
);
// Don't permit the handle to be used again.
_handle
=
new
HandleRef
(
this
,
IntPtr
.
Zero
);
}
public
int
NumSpeakers
{
get
{
return
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
_handle
.
Handle
);
}
}
private
HandleRef
_handle
;
private
int
_dim
;
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxCreateSpeakerEmbeddingManager
(
int
dim
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxDestroySpeakerEmbeddingManager
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingManagerAdd
(
IntPtr
handle
,
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
string
name
,
float
[]
v
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingManagerAddListFlattened
(
IntPtr
handle
,
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
string
name
,
float
[]
v
,
int
n
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingManagerRemove
(
IntPtr
handle
,
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
string
name
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxSpeakerEmbeddingManagerSearch
(
IntPtr
handle
,
float
[]
v
,
float
threshold
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
IntPtr
p
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingManagerVerify
(
IntPtr
handle
,
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
string
name
,
float
[]
v
,
float
threshold
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingManagerContains
(
IntPtr
handle
,
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
string
name
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
int
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
IntPtr
names
);
}
public
class
SpokenLanguageIdentificationResult
{
public
SpokenLanguageIdentificationResult
(
IntPtr
handle
)
...
...
请
注册
或
登录
后发表评论