Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-01-30 11:21:43 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-01-30 11:21:43 +0800
Commit
0b18ccfbb2ec51b3986c0b6bcef6ea9911ccc1e0
0b18ccfb
1 parent
0aa47e5c
C++ API demo for speaker identification with portaudio. (#561)
隐藏空白字符变更
内嵌
并排对比
正在显示
5 个修改的文件
包含
320 行增加
和
2 行删除
cmake/cmake_extension.py
setup.py
sherpa-onnx/csrc/CMakeLists.txt
sherpa-onnx/csrc/sherpa-onnx-microphone-offline-speaker-identification.cc
sherpa-onnx/csrc/speaker-embedding-extractor.cc
cmake/cmake_extension.py
查看文件 @
0b18ccf
...
...
@@ -155,6 +155,7 @@ class BuildExtension(build_ext):
binaries
+=
[
"sherpa-onnx-offline"
]
binaries
+=
[
"sherpa-onnx-microphone"
]
binaries
+=
[
"sherpa-onnx-microphone-offline"
]
binaries
+=
[
"sherpa-onnx-microphone-offline-speaker-identification"
]
binaries
+=
[
"sherpa-onnx-online-websocket-server"
]
binaries
+=
[
"sherpa-onnx-offline-websocket-server"
]
binaries
+=
[
"sherpa-onnx-online-websocket-client"
]
...
...
setup.py
查看文件 @
0b18ccf
...
...
@@ -48,6 +48,7 @@ def get_binaries_to_install():
binaries
+=
[
"sherpa-onnx-offline"
]
binaries
+=
[
"sherpa-onnx-microphone"
]
binaries
+=
[
"sherpa-onnx-microphone-offline"
]
binaries
+=
[
"sherpa-onnx-microphone-offline-speaker-identification"
]
binaries
+=
[
"sherpa-onnx-online-websocket-server"
]
binaries
+=
[
"sherpa-onnx-offline-websocket-server"
]
binaries
+=
[
"sherpa-onnx-online-websocket-client"
]
...
...
sherpa-onnx/csrc/CMakeLists.txt
查看文件 @
0b18ccf
...
...
@@ -287,6 +287,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
microphone.cc
)
add_executable
(
sherpa-onnx-microphone-offline-speaker-identification
sherpa-onnx-microphone-offline-speaker-identification.cc
microphone.cc
)
if
(
BUILD_SHARED_LIBS
)
set
(
PA_LIB portaudio
)
else
()
...
...
@@ -294,9 +299,10 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
endif
()
set
(
exes
sherpa-onnx-offline-tts-play
sherpa-onnx-microphone
sherpa-onnx-microphone-offline
sherpa-onnx-microphone-offline-speaker-identification
sherpa-onnx-offline-tts-play
sherpa-onnx-vad-microphone
sherpa-onnx-vad-microphone-offline-asr
)
...
...
sherpa-onnx/csrc/sherpa-onnx-microphone-offline-speaker-identification.cc
0 → 100644
查看文件 @
0b18ccf
// sherpa-onnx/csrc/sherpa-onnx-microphone-offline-speaker-identification.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <fstream>
#include <mutex> // NOLINT
#include <sstream>
#include <thread> // NOLINT
#include "portaudio.h" // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/microphone.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/csrc/wave-reader.h"
enum
class
State
{
kIdle
,
kRecording
,
kComputing
,
};
State
state
=
State
::
kIdle
;
// true to stop the program and exit
bool
stop
=
false
;
std
::
vector
<
float
>
samples
;
std
::
mutex
samples_mutex
;
static
void
DetectKeyPress
()
{
SHERPA_ONNX_LOGE
(
"
\n
Press Enter to start"
);
int32_t
key
;
while
(
!
stop
&&
(
key
=
getchar
()))
{
if
(
key
!=
0x0a
)
{
continue
;
}
switch
(
state
)
{
case
State
:
:
kIdle
:
SHERPA_ONNX_LOGE
(
"
\n
Start recording. Press Enter to stop recording"
);
state
=
State
::
kRecording
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
samples_mutex
);
samples
.
clear
();
}
break
;
case
State
:
:
kRecording
:
SHERPA_ONNX_LOGE
(
"
\n
Stop recording. Computing ..."
);
state
=
State
::
kComputing
;
break
;
case
State
:
:
kComputing
:
break
;
}
}
}
static
int32_t
RecordCallback
(
const
void
*
input_buffer
,
void
*
/*output_buffer*/
,
unsigned
long
frames_per_buffer
,
// NOLINT
const
PaStreamCallbackTimeInfo
*
/*time_info*/
,
PaStreamCallbackFlags
/*status_flags*/
,
void
*
user_data
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
samples_mutex
);
auto
p
=
reinterpret_cast
<
const
float
*>
(
input_buffer
);
samples
.
insert
(
samples
.
end
(),
p
,
p
+
frames_per_buffer
);
return
stop
?
paComplete
:
paContinue
;
}
static
void
Handler
(
int32_t
sig
)
{
stop
=
true
;
fprintf
(
stderr
,
"
\n
Caught Ctrl + C. Press Enter to exit
\n
"
);
}
static
std
::
vector
<
std
::
vector
<
float
>>
ComputeEmbeddings
(
const
std
::
vector
<
std
::
string
>
&
filenames
,
sherpa_onnx
::
SpeakerEmbeddingExtractor
*
extractor
)
{
std
::
vector
<
std
::
vector
<
float
>>
embedding_list
;
embedding_list
.
reserve
(
filenames
.
size
());
for
(
const
auto
&
f
:
filenames
)
{
int32_t
sampling_rate
=
-
1
;
bool
is_ok
=
false
;
const
std
::
vector
<
float
>
samples
=
sherpa_onnx
::
ReadWave
(
f
,
&
sampling_rate
,
&
is_ok
);
if
(
!
is_ok
)
{
fprintf
(
stderr
,
"Failed to read %s
\n
"
,
f
.
c_str
());
exit
(
-
1
);
}
auto
s
=
extractor
->
CreateStream
();
s
->
AcceptWaveform
(
sampling_rate
,
samples
.
data
(),
samples
.
size
());
s
->
InputFinished
();
auto
embedding
=
extractor
->
Compute
(
s
.
get
());
embedding_list
.
push_back
(
embedding
);
}
return
embedding_list
;
}
static
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ReadSpeakerFile
(
const
std
::
string
&
filename
)
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
ans
;
std
::
ifstream
is
(
filename
);
if
(
!
is
)
{
fprintf
(
stderr
,
"Failed to open %s"
,
filename
.
c_str
());
exit
(
0
);
}
std
::
string
line
;
std
::
string
name
;
std
::
string
path
;
while
(
std
::
getline
(
is
,
line
))
{
std
::
istringstream
iss
(
line
);
name
.
clear
();
path
.
clear
();
iss
>>
name
>>
path
;
if
(
!
iss
||
!
iss
.
eof
()
||
name
.
empty
()
||
path
.
empty
())
{
fprintf
(
stderr
,
"Invalid line: %s
\n
"
,
line
.
c_str
());
exit
(
-
1
);
}
ans
[
name
].
push_back
(
path
);
}
return
ans
;
}
int32_t
main
(
int32_t
argc
,
char
*
argv
[])
{
signal
(
SIGINT
,
Handler
);
const
char
*
kUsageMessage
=
R"usage(
This program shows how to use non-streaming speaker identification.
Usage:
(1) Prepare a text file containing speaker related files.
Each line in the text file contains two columns. The first column is the
speaker name, while the second column contains the wave file of the speaker.
If the text file contains multiple wave files for the same speaker, then the
embeddings of these files are averaged.
An example text file is given below:
foo /path/to/a.wav
bar /path/to/b.wav
foo /path/to/c.wav
foobar /path/to/d.wav
Each wave file should contain only a single channel; the sample format
should be int16_t; the sample rate can be arbitrary.
(2) Download a model for computing speaker embeddings
Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
to download a model. An example is given below:
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/wespeaker_zh_cnceleb_resnet34.onnx
Note that `zh` means Chinese, while `en` means English.
(3) Run it !
./bin/sherpa-onnx-microphone-offline-speaker-identification \
--model=/path/to/your-model.onnx \
--speaker-file=/path/to/speaker.txt
)usage"
;
sherpa_onnx
::
ParseOptions
po
(
kUsageMessage
);
float
threshold
=
0.5
;
std
::
string
speaker_file
;
po
.
Register
(
"threshold"
,
&
threshold
,
"Threshold for comparing embedding scores."
);
po
.
Register
(
"speaker-file"
,
&
speaker_file
,
"Path to speaker.txt"
);
sherpa_onnx
::
SpeakerEmbeddingExtractorConfig
config
;
config
.
Register
(
&
po
);
po
.
Read
(
argc
,
argv
);
if
(
po
.
NumArgs
()
!=
0
)
{
fprintf
(
stderr
,
"This program does not support any positional arguments.
\n
"
);
po
.
PrintUsage
();
exit
(
EXIT_FAILURE
);
}
fprintf
(
stderr
,
"%s
\n
"
,
config
.
ToString
().
c_str
());
if
(
!
config
.
Validate
())
{
fprintf
(
stderr
,
"Errors in config! Please use --help to view the usage.
\n
"
);
return
-
1
;
}
SHERPA_ONNX_LOGE
(
"
\n
Creating extractor ..."
);
sherpa_onnx
::
SpeakerEmbeddingExtractor
extractor
(
config
);
SHERPA_ONNX_LOGE
(
"
\n
extractor created!"
);
sherpa_onnx
::
SpeakerEmbeddingManager
manager
(
extractor
.
Dim
());
auto
name2files
=
ReadSpeakerFile
(
speaker_file
);
for
(
const
auto
&
p
:
name2files
)
{
SHERPA_ONNX_LOGE
(
"
\n
Processing speaker %s"
,
p
.
first
.
c_str
());
auto
embedding_list
=
ComputeEmbeddings
(
p
.
second
,
&
extractor
);
manager
.
Add
(
p
.
first
,
embedding_list
);
}
sherpa_onnx
::
Microphone
mic
;
PaDeviceIndex
num_devices
=
Pa_GetDeviceCount
();
fprintf
(
stderr
,
"Num devices: %d
\n
"
,
num_devices
);
PaStreamParameters
param
;
param
.
device
=
Pa_GetDefaultInputDevice
();
if
(
param
.
device
==
paNoDevice
)
{
fprintf
(
stderr
,
"No default input device found
\n
"
);
exit
(
EXIT_FAILURE
);
}
fprintf
(
stderr
,
"Use default device: %d
\n
"
,
param
.
device
);
const
PaDeviceInfo
*
info
=
Pa_GetDeviceInfo
(
param
.
device
);
fprintf
(
stderr
,
" Name: %s
\n
"
,
info
->
name
);
fprintf
(
stderr
,
" Max input channels: %d
\n
"
,
info
->
maxInputChannels
);
param
.
channelCount
=
1
;
param
.
sampleFormat
=
paFloat32
;
param
.
suggestedLatency
=
info
->
defaultLowInputLatency
;
param
.
hostApiSpecificStreamInfo
=
nullptr
;
float
sample_rate
=
16000
;
PaStream
*
stream
;
PaError
err
=
Pa_OpenStream
(
&
stream
,
&
param
,
nullptr
,
/* &outputParameters, */
sample_rate
,
0
,
// frames per buffer
paClipOff
,
// we won't output out of range samples
// so don't bother clipping them
RecordCallback
,
nullptr
);
if
(
err
!=
paNoError
)
{
fprintf
(
stderr
,
"portaudio error: %s
\n
"
,
Pa_GetErrorText
(
err
));
exit
(
EXIT_FAILURE
);
}
err
=
Pa_StartStream
(
stream
);
fprintf
(
stderr
,
"Started
\n
"
);
if
(
err
!=
paNoError
)
{
fprintf
(
stderr
,
"portaudio error: %s
\n
"
,
Pa_GetErrorText
(
err
));
exit
(
EXIT_FAILURE
);
}
std
::
thread
t
(
DetectKeyPress
);
while
(
!
stop
)
{
switch
(
state
)
{
case
State
:
:
kIdle
:
break
;
case
State
:
:
kRecording
:
break
;
case
State
:
:
kComputing
:
{
std
::
vector
<
float
>
buf
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
samples_mutex
);
buf
=
std
::
move
(
samples
);
}
auto
s
=
extractor
.
CreateStream
();
s
->
AcceptWaveform
(
sample_rate
,
buf
.
data
(),
buf
.
size
());
s
->
InputFinished
();
auto
embedding
=
extractor
.
Compute
(
s
.
get
());
auto
name
=
manager
.
Search
(
embedding
.
data
(),
threshold
);
if
(
name
.
empty
())
{
name
=
"--Unknown--"
;
}
SHERPA_ONNX_LOGE
(
"
\n
Done!
\n
Detected speaker is: %s"
,
name
.
c_str
());
state
=
State
::
kIdle
;
SHERPA_ONNX_LOGE
(
"
\n
Press Enter to start"
);
break
;
}
}
Pa_Sleep
(
20
);
// sleep for 20ms
}
t
.
join
();
err
=
Pa_CloseStream
(
stream
);
if
(
err
!=
paNoError
)
{
fprintf
(
stderr
,
"portaudio error: %s
\n
"
,
Pa_GetErrorText
(
err
));
exit
(
EXIT_FAILURE
);
}
return
0
;
}
...
...
sherpa-onnx/csrc/speaker-embedding-extractor.cc
查看文件 @
0b18ccf
...
...
@@ -26,7 +26,7 @@ void SpeakerEmbeddingExtractorConfig::Register(ParseOptions *po) {
bool
SpeakerEmbeddingExtractorConfig
::
Validate
()
const
{
if
(
model
.
empty
())
{
SHERPA_ONNX_LOGE
(
"Please provide --
speaker-embedding-
model"
);
SHERPA_ONNX_LOGE
(
"Please provide --model"
);
return
false
;
}
...
...
请
注册
或
登录
后发表评论