Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2023-03-03 16:42:33 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2023-03-03 16:42:33 +0800
Commit
9d8fddef01d30e841d5f731014f71cd6c82311b5
9d8fddef
1 parent
5f31b22c
Support resampling (#77)
隐藏空白字符变更
内嵌
并排对比
正在显示
10 个修改的文件
包含
96 行增加
和
26 行删除
python-api-examples/decode-file.py
python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py
python-api-examples/speech-recognition-from-microphone.py
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/features.cc
sherpa-onnx/csrc/features.h
sherpa-onnx/csrc/online-stream.cc
sherpa-onnx/csrc/online-stream.h
sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
sherpa-onnx/python/csrc/online-stream.cc
python-api-examples/decode-file.py
查看文件 @
9d8fdde
...
...
@@ -78,8 +78,6 @@ def get_args():
def
main
():
sample_rate
=
16000
args
=
get_args
()
assert_file_exists
(
args
.
encoder
)
assert_file_exists
(
args
.
decoder
)
...
...
@@ -95,12 +93,16 @@ def main():
decoder
=
args
.
decoder
,
joiner
=
args
.
joiner
,
num_threads
=
args
.
num_threads
,
sample_rate
=
sample_rate
,
sample_rate
=
16000
,
feature_dim
=
80
,
decoding_method
=
args
.
decoding_method
,
)
with
wave
.
open
(
args
.
wave_filename
)
as
f
:
assert
f
.
getframerate
()
==
sample_rate
,
f
.
getframerate
()
# If the wave file has a different sampling rate from the one
# expected by the model (16 kHz in our case), we will do
# resampling inside sherpa-onnx
wave_file_sample_rate
=
f
.
getframerate
()
assert
f
.
getnchannels
()
==
1
,
f
.
getnchannels
()
assert
f
.
getsampwidth
()
==
2
,
f
.
getsampwidth
()
# it is in bytes
num_samples
=
f
.
getnframes
()
...
...
@@ -110,17 +112,17 @@ def main():
samples_float32
=
samples_float32
/
32768
duration
=
len
(
samples_float32
)
/
sample_rate
duration
=
len
(
samples_float32
)
/
wave_file_
sample_rate
start_time
=
time
.
time
()
print
(
"Started!"
)
stream
=
recognizer
.
create_stream
()
stream
.
accept_waveform
(
sample_rate
,
samples_float32
)
stream
.
accept_waveform
(
wave_file_
sample_rate
,
samples_float32
)
tail_paddings
=
np
.
zeros
(
int
(
0.2
*
sample_rate
),
dtype
=
np
.
float32
)
stream
.
accept_waveform
(
sample_rate
,
tail_paddings
)
tail_paddings
=
np
.
zeros
(
int
(
0.2
*
wave_file_sample_rate
),
dtype
=
np
.
float32
)
stream
.
accept_waveform
(
wave_file_sample_rate
,
tail_paddings
)
stream
.
input_finished
()
...
...
python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py
查看文件 @
9d8fdde
...
...
@@ -100,7 +100,9 @@ def main():
recognizer
=
create_recognizer
()
print
(
"Started! Please speak"
)
sample_rate
=
16000
# The model is using 16 kHz, we use 48 kHz here to demonstrate that
# sherpa-onnx will do resampling inside.
sample_rate
=
48000
samples_per_read
=
int
(
0.1
*
sample_rate
)
# 0.1 second = 100 ms
last_result
=
""
stream
=
recognizer
.
create_stream
()
...
...
python-api-examples/speech-recognition-from-microphone.py
查看文件 @
9d8fdde
...
...
@@ -92,9 +92,12 @@ def create_recognizer():
def
main
():
print
(
"Started! Please speak"
)
recognizer
=
create_recognizer
()
sample_rate
=
16000
print
(
"Started! Please speak"
)
# The model is using 16 kHz, we use 48 kHz here to demonstrate that
# sherpa-onnx will do resampling inside.
sample_rate
=
48000
samples_per_read
=
int
(
0.1
*
sample_rate
)
# 0.1 second = 100 ms
last_result
=
""
stream
=
recognizer
.
create_stream
()
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
9d8fdde
...
...
@@ -115,8 +115,9 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream);
/// decoding.
///
/// @param stream A pointer returned by CreateOnlineStream().
/// @param sample_rate Sampler rate of the input samples. It has to be 16 kHz
/// for models from icefall.
/// @param sample_rate Sample rate of the input samples. If it is different
/// from config.feat_config.sample_rate, we will do
/// resampling inside sherpa-onnx.
/// @param samples A pointer to a 1-D array containing audio samples.
/// The range of samples has to be normalized to [-1, 1].
/// @param n Number of elements in the samples array.
...
...
sherpa-onnx/csrc/features.cc
查看文件 @
9d8fdde
...
...
@@ -11,6 +11,8 @@
#include <vector>
#include "kaldi-native-fbank/csrc/online-feature.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/resample.h"
namespace
sherpa_onnx
{
...
...
@@ -50,6 +52,46 @@ class FeatureExtractor::Impl {
void
AcceptWaveform
(
int32_t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
if
(
resampler_
)
{
if
(
sampling_rate
!=
resampler_
->
GetInputSamplingRate
())
{
SHERPA_ONNX_LOGE
(
"You changed the input sampling rate!! Expected: %d, given: "
"%d"
,
resampler_
->
GetInputSamplingRate
(),
sampling_rate
);
exit
(
-
1
);
}
std
::
vector
<
float
>
samples
;
resampler_
->
Resample
(
waveform
,
n
,
false
,
&
samples
);
fbank_
->
AcceptWaveform
(
opts_
.
frame_opts
.
samp_freq
,
samples
.
data
(),
samples
.
size
());
return
;
}
if
(
sampling_rate
!=
opts_
.
frame_opts
.
samp_freq
)
{
SHERPA_ONNX_LOGE
(
"Creating a resampler:
\n
"
" in_sample_rate: %d
\n
"
" output_sample_rate: %d
\n
"
,
sampling_rate
,
static_cast
<
int32_t
>
(
opts_
.
frame_opts
.
samp_freq
));
float
min_freq
=
std
::
min
<
int32_t
>
(
sampling_rate
,
opts_
.
frame_opts
.
samp_freq
);
float
lowpass_cutoff
=
0.99
*
0.5
*
min_freq
;
int32_t
lowpass_filter_width
=
6
;
resampler_
=
std
::
make_unique
<
LinearResample
>
(
sampling_rate
,
opts_
.
frame_opts
.
samp_freq
,
lowpass_cutoff
,
lowpass_filter_width
);
std
::
vector
<
float
>
samples
;
resampler_
->
Resample
(
waveform
,
n
,
false
,
&
samples
);
fbank_
->
AcceptWaveform
(
opts_
.
frame_opts
.
samp_freq
,
samples
.
data
(),
samples
.
size
());
return
;
}
fbank_
->
AcceptWaveform
(
sampling_rate
,
waveform
,
n
);
}
...
...
@@ -100,6 +142,7 @@ class FeatureExtractor::Impl {
std
::
unique_ptr
<
knf
::
OnlineFbank
>
fbank_
;
knf
::
FbankOptions
opts_
;
mutable
std
::
mutex
mutex_
;
std
::
unique_ptr
<
LinearResample
>
resampler_
;
};
FeatureExtractor
::
FeatureExtractor
(
const
FeatureExtractorConfig
&
config
/*={}*/
)
...
...
sherpa-onnx/csrc/features.h
查看文件 @
9d8fdde
...
...
@@ -29,9 +29,11 @@ class FeatureExtractor {
~
FeatureExtractor
();
/**
@param sampling_rate The sampling_rate of the input waveform. Should match
the one expected by the feature extractor.
@param waveform Pointer to a 1-D array of size n
@param sampling_rate The sampling_rate of the input waveform. If it does
not equal to config.sampling_rate, we will do
resampling inside.
@param waveform Pointer to a 1-D array of size n. It must be normalized to
the range [-1, 1].
@param n Number of entries in waveform
*/
void
AcceptWaveform
(
int32_t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
);
...
...
sherpa-onnx/csrc/online-stream.cc
查看文件 @
9d8fdde
...
...
@@ -16,7 +16,7 @@ class OnlineStream::Impl {
explicit
Impl
(
const
FeatureExtractorConfig
&
config
)
:
feat_extractor_
(
config
)
{}
void
AcceptWaveform
(
floa
t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
)
{
void
AcceptWaveform
(
int32_
t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
)
{
feat_extractor_
.
AcceptWaveform
(
sampling_rate
,
waveform
,
n
);
}
...
...
@@ -67,7 +67,7 @@ OnlineStream::OnlineStream(const FeatureExtractorConfig &config /*= {}*/)
OnlineStream
::~
OnlineStream
()
=
default
;
void
OnlineStream
::
AcceptWaveform
(
floa
t
sampling_rate
,
const
float
*
waveform
,
void
OnlineStream
::
AcceptWaveform
(
int32_
t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
)
{
impl_
->
AcceptWaveform
(
sampling_rate
,
waveform
,
n
);
}
...
...
sherpa-onnx/csrc/online-stream.h
查看文件 @
9d8fdde
...
...
@@ -20,12 +20,14 @@ class OnlineStream {
~
OnlineStream
();
/**
@param sampling_rate The sampling_rate of the input waveform. Should match
the one expected by the feature extractor.
@param waveform Pointer to a 1-D array of size n
@param sampling_rate The sampling_rate of the input waveform. If it does
not equal to config.sampling_rate, we will do
resampling inside.
@param waveform Pointer to a 1-D array of size n. It must be normalized to
the range [-1, 1].
@param n Number of entries in waveform
*/
void
AcceptWaveform
(
floa
t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
);
void
AcceptWaveform
(
int32_
t
sampling_rate
,
const
float
*
waveform
,
int32_t
n
);
/**
* InputFinished() tells the class you won't be providing any
...
...
sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
查看文件 @
9d8fdde
...
...
@@ -76,6 +76,7 @@ OnlineTransducerModifiedBeamSearchDecoder::GetEmptyResult() const {
std
::
vector
<
int64_t
>
blanks
(
context_size
,
blank_id
);
Hypotheses
blank_hyp
({{
blanks
,
0
}});
r
.
hyps
=
std
::
move
(
blank_hyp
);
r
.
tokens
=
std
::
move
(
blanks
);
return
r
;
}
...
...
sherpa-onnx/python/csrc/online-stream.cc
查看文件 @
9d8fdde
...
...
@@ -8,13 +8,27 @@
namespace
sherpa_onnx
{
constexpr
const
char
*
kAcceptWaveformUsage
=
R"(
Process audio samples.
Args:
sample_rate:
Sample rate of the input samples. If it is different from the one
expected by the model, we will do resampling inside.
waveform:
A 1-D float32 tensor containing audio samples. It must be normalized
to the range [-1, 1].
)"
;
void
PybindOnlineStream
(
py
::
module
*
m
)
{
using
PyClass
=
OnlineStream
;
py
::
class_
<
PyClass
>
(
*
m
,
"OnlineStream"
)
.
def
(
"accept_waveform"
,
[](
PyClass
&
self
,
float
sample_rate
,
py
::
array_t
<
float
>
waveform
)
{
self
.
AcceptWaveform
(
sample_rate
,
waveform
.
data
(),
waveform
.
size
());
})
.
def
(
"accept_waveform"
,
[](
PyClass
&
self
,
float
sample_rate
,
py
::
array_t
<
float
>
waveform
)
{
self
.
AcceptWaveform
(
sample_rate
,
waveform
.
data
(),
waveform
.
size
());
},
py
::
arg
(
"sample_rate"
),
py
::
arg
(
"waveform"
),
kAcceptWaveformUsage
)
.
def
(
"input_finished"
,
&
PyClass
::
InputFinished
);
}
...
...
请
注册
或
登录
后发表评论