Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2023-07-14 18:07:27 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2023-07-14 18:07:27 +0800
Commit
f3206c49dc8d4d8be3ff75530c96fa3a99195c06
f3206c49
1 parent
0abd7ce8
Reduce model initialization time for offline speech recognition (#213)
隐藏空白字符变更
内嵌
并排对比
正在显示
10 个修改的文件
包含
113 行增加
和
40 行删除
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
scripts/dotnet/offline.cs
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/offline-model-config.cc
sherpa-onnx/csrc/offline-model-config.h
sherpa-onnx/csrc/offline-recognizer-impl.cc
sherpa-onnx/csrc/offline-transducer-model-config.cc
sherpa-onnx/python/csrc/offline-model-config.cc
sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
查看文件 @
f3206c4
...
...
@@ -387,6 +387,7 @@ void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
config_
.
model_config
.
tokens
=
tokens
.
c_str
();
config_
.
model_config
.
num_threads
=
1
;
config_
.
model_config
.
debug
=
1
;
config_
.
model_config
.
model_type
=
"paraformer"
;
config_
.
decoding_method
=
"greedy_search"
;
config_
.
max_active_paths
=
4
;
...
...
@@ -447,6 +448,7 @@ void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
config_
.
model_config
.
tokens
=
tokens
.
c_str
();
config_
.
model_config
.
num_threads
=
1
;
config_
.
model_config
.
debug
=
0
;
config_
.
model_config
.
model_type
=
"transducer"
;
config_
.
decoding_method
=
"greedy_search"
;
config_
.
max_active_paths
=
4
;
...
...
scripts/dotnet/offline.cs
查看文件 @
f3206c4
...
...
@@ -76,6 +76,8 @@ namespace SherpaOnnx
Tokens
=
""
;
NumThreads
=
1
;
Debug
=
0
;
Provider
=
"cpu"
;
ModelType
=
""
;
}
public
OfflineTransducerModelConfig
Transducer
;
public
OfflineParaformerModelConfig
Paraformer
;
...
...
@@ -87,6 +89,12 @@ namespace SherpaOnnx
public
int
NumThreads
;
public
int
Debug
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Provider
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
ModelType
;
}
[
StructLayout
(
LayoutKind
.
Sequential
)]
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
f3206c4
...
...
@@ -33,23 +33,33 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
const
SherpaOnnxOnlineRecognizerConfig
*
config
)
{
sherpa_onnx
::
OnlineRecognizerConfig
recognizer_config
;
recognizer_config
.
feat_config
.
sampling_rate
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
sample_rate
,
16000
);
recognizer_config
.
feat_config
.
feature_dim
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
feature_dim
,
80
);
recognizer_config
.
feat_config
.
sampling_rate
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
sample_rate
,
16000
);
recognizer_config
.
feat_config
.
feature_dim
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
feature_dim
,
80
);
recognizer_config
.
model_config
.
encoder_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
encoder
,
""
);
recognizer_config
.
model_config
.
decoder_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
decoder
,
""
);
recognizer_config
.
model_config
.
joiner_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
joiner
,
""
);
recognizer_config
.
model_config
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model_config
.
tokens
,
""
);
recognizer_config
.
model_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
recognizer_config
.
enable_endpoint
=
SHERPA_ONNX_OR
(
config
->
enable_endpoint
,
0
);
recognizer_config
.
model_config
.
joiner_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
joiner
,
""
);
recognizer_config
.
model_config
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model_config
.
tokens
,
""
);
recognizer_config
.
model_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
recognizer_config
.
enable_endpoint
=
SHERPA_ONNX_OR
(
config
->
enable_endpoint
,
0
);
recognizer_config
.
endpoint_config
.
rule1
.
min_trailing_silence
=
SHERPA_ONNX_OR
(
config
->
rule1_min_trailing_silence
,
2.4
);
...
...
@@ -173,9 +183,11 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
const
SherpaOnnxOfflineRecognizerConfig
*
config
)
{
sherpa_onnx
::
OfflineRecognizerConfig
recognizer_config
;
recognizer_config
.
feat_config
.
sampling_rate
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
sample_rate
,
16000
);
recognizer_config
.
feat_config
.
sampling_rate
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
sample_rate
,
16000
);
recognizer_config
.
feat_config
.
feature_dim
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
feature_dim
,
80
);
recognizer_config
.
feat_config
.
feature_dim
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
feature_dim
,
80
);
recognizer_config
.
model_config
.
transducer
.
encoder_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
encoder
,
""
);
...
...
@@ -184,7 +196,7 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
decoder
,
""
);
recognizer_config
.
model_config
.
transducer
.
joiner_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
joiner
,
""
);
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
joiner
,
""
);
recognizer_config
.
model_config
.
paraformer
.
model
=
SHERPA_ONNX_OR
(
config
->
model_config
.
paraformer
.
model
,
""
);
...
...
@@ -192,15 +204,26 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
recognizer_config
.
model_config
.
nemo_ctc
.
model
=
SHERPA_ONNX_OR
(
config
->
model_config
.
nemo_ctc
.
model
,
""
);
recognizer_config
.
model_config
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model_config
.
tokens
,
""
);
recognizer_config
.
model_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
lm_config
.
model
=
SHERPA_ONNX_OR
(
config
->
lm_config
.
model
,
""
);
recognizer_config
.
lm_config
.
scale
=
SHERPA_ONNX_OR
(
config
->
lm_config
.
scale
,
1.0
);
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
recognizer_config
.
model_config
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model_config
.
tokens
,
""
);
recognizer_config
.
model_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
model_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
recognizer_config
.
model_config
.
model_type
=
SHERPA_ONNX_OR
(
config
->
model_config
.
model_type
,
""
);
recognizer_config
.
lm_config
.
model
=
SHERPA_ONNX_OR
(
config
->
lm_config
.
model
,
""
);
recognizer_config
.
lm_config
.
scale
=
SHERPA_ONNX_OR
(
config
->
lm_config
.
scale
,
1.0
);
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
if
(
config
->
model_config
.
debug
)
{
fprintf
(
stderr
,
"%s
\n
"
,
recognizer_config
.
ToString
().
c_str
());
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
f3206c4
...
...
@@ -272,6 +272,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
const
char
*
tokens
;
int32_t
num_threads
;
int32_t
debug
;
const
char
*
provider
;
const
char
*
model_type
;
}
SherpaOnnxOfflineModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineRecognizerConfig
{
...
...
sherpa-onnx/csrc/offline-model-config.cc
查看文件 @
f3206c4
...
...
@@ -25,6 +25,11 @@ void OfflineModelConfig::Register(ParseOptions *po) {
po
->
Register
(
"provider"
,
&
provider
,
"Specify a provider to use: cpu, cuda, coreml"
);
po
->
Register
(
"model-type"
,
&
model_type
,
"Specify it to reduce model initialization time. "
"Valid values are: transducer, paraformer, nemo_ctc. "
"All other values lead to loading the model twice."
);
}
bool
OfflineModelConfig
::
Validate
()
const
{
...
...
@@ -34,7 +39,7 @@ bool OfflineModelConfig::Validate() const {
}
if
(
!
FileExists
(
tokens
))
{
SHERPA_ONNX_LOGE
(
"%s does not exist"
,
tokens
.
c_str
());
SHERPA_ONNX_LOGE
(
"
tokens:
%s does not exist"
,
tokens
.
c_str
());
return
false
;
}
...
...
@@ -59,7 +64,8 @@ std::string OfflineModelConfig::ToString() const {
os
<<
"tokens=
\"
"
<<
tokens
<<
"
\"
, "
;
os
<<
"num_threads="
<<
num_threads
<<
", "
;
os
<<
"debug="
<<
(
debug
?
"True"
:
"False"
)
<<
", "
;
os
<<
"provider=
\"
"
<<
provider
<<
"
\"
)"
;
os
<<
"provider=
\"
"
<<
provider
<<
"
\"
, "
;
os
<<
"model_type=
\"
"
<<
model_type
<<
"
\"
)"
;
return
os
.
str
();
}
...
...
sherpa-onnx/csrc/offline-model-config.h
查看文件 @
f3206c4
...
...
@@ -22,19 +22,31 @@ struct OfflineModelConfig {
bool
debug
=
false
;
std
::
string
provider
=
"cpu"
;
// With the help of this field, we only need to load the model once
// instead of twice; and therefore it reduces initialization time.
//
// Valid values:
// - transducer. The given model is from icefall
// - paraformer. It is a paraformer model
// - nemo_ctc. It is a NeMo CTC model.
//
// All other values are invalid and lead to loading the model twice.
std
::
string
model_type
;
OfflineModelConfig
()
=
default
;
OfflineModelConfig
(
const
OfflineTransducerModelConfig
&
transducer
,
const
OfflineParaformerModelConfig
&
paraformer
,
const
OfflineNemoEncDecCtcModelConfig
&
nemo_ctc
,
const
std
::
string
&
tokens
,
int32_t
num_threads
,
bool
debug
,
const
std
::
string
&
provider
)
const
std
::
string
&
provider
,
const
std
::
string
&
model_type
)
:
transducer
(
transducer
),
paraformer
(
paraformer
),
nemo_ctc
(
nemo_ctc
),
tokens
(
tokens
),
num_threads
(
num_threads
),
debug
(
debug
),
provider
(
provider
)
{}
provider
(
provider
),
model_type
(
model_type
)
{}
void
Register
(
ParseOptions
*
po
);
bool
Validate
()
const
;
...
...
sherpa-onnx/csrc/offline-recognizer-impl.cc
查看文件 @
f3206c4
...
...
@@ -18,6 +18,21 @@ namespace sherpa_onnx {
std
::
unique_ptr
<
OfflineRecognizerImpl
>
OfflineRecognizerImpl
::
Create
(
const
OfflineRecognizerConfig
&
config
)
{
if
(
!
config
.
model_config
.
model_type
.
empty
())
{
const
auto
&
model_type
=
config
.
model_config
.
model_type
;
if
(
model_type
==
"transducer"
)
{
return
std
::
make_unique
<
OfflineRecognizerTransducerImpl
>
(
config
);
}
else
if
(
model_type
==
"paraformer"
)
{
return
std
::
make_unique
<
OfflineRecognizerParaformerImpl
>
(
config
);
}
else
if
(
model_type
==
"nemo_ctc"
)
{
return
std
::
make_unique
<
OfflineRecognizerCtcImpl
>
(
config
);
}
else
{
SHERPA_ONNX_LOGE
(
"Invalid model_type: %s. Trying to load the model to get its type"
,
model_type
.
c_str
());
}
}
Ort
::
Env
env
(
ORT_LOGGING_LEVEL_ERROR
);
Ort
::
SessionOptions
sess_opts
;
...
...
sherpa-onnx/csrc/offline-transducer-model-config.cc
查看文件 @
f3206c4
...
...
@@ -18,17 +18,17 @@ void OfflineTransducerModelConfig::Register(ParseOptions *po) {
bool
OfflineTransducerModelConfig
::
Validate
()
const
{
if
(
!
FileExists
(
encoder_filename
))
{
SHERPA_ONNX_LOGE
(
"%s does not exist"
,
encoder_filename
.
c_str
());
SHERPA_ONNX_LOGE
(
"
encoder:
%s does not exist"
,
encoder_filename
.
c_str
());
return
false
;
}
if
(
!
FileExists
(
decoder_filename
))
{
SHERPA_ONNX_LOGE
(
"%s does not exist"
,
decoder_filename
.
c_str
());
SHERPA_ONNX_LOGE
(
"
decoder:
%s does not exist"
,
decoder_filename
.
c_str
());
return
false
;
}
if
(
!
FileExists
(
joiner_filename
))
{
SHERPA_ONNX_LOGE
(
"%s does not exist"
,
joiner_filename
.
c_str
());
SHERPA_ONNX_LOGE
(
"
joiner:
%s does not exist"
,
joiner_filename
.
c_str
());
return
false
;
}
...
...
sherpa-onnx/python/csrc/offline-model-config.cc
查看文件 @
f3206c4
...
...
@@ -21,15 +21,16 @@ void PybindOfflineModelConfig(py::module *m) {
using
PyClass
=
OfflineModelConfig
;
py
::
class_
<
PyClass
>
(
*
m
,
"OfflineModelConfig"
)
.
def
(
py
::
init
<
const
OfflineTransducerModelConfig
&
,
const
OfflineParaformerModelConfig
&
,
const
OfflineNemoEncDecCtcModelConfig
&
,
const
std
::
string
&
,
int32_t
,
bool
,
const
std
::
string
&>
(),
py
::
arg
(
"transducer"
)
=
OfflineTransducerModelConfig
(),
py
::
arg
(
"paraformer"
)
=
OfflineParaformerModelConfig
(),
py
::
arg
(
"nemo_ctc"
)
=
OfflineNemoEncDecCtcModelConfig
(),
py
::
arg
(
"tokens"
),
py
::
arg
(
"num_threads"
),
py
::
arg
(
"debug"
)
=
false
,
py
::
arg
(
"provider"
)
=
"cpu"
)
.
def
(
py
::
init
<
const
OfflineTransducerModelConfig
&
,
const
OfflineParaformerModelConfig
&
,
const
OfflineNemoEncDecCtcModelConfig
&
,
const
std
::
string
&
,
int32_t
,
bool
,
const
std
::
string
&
,
const
std
::
string
&>
(),
py
::
arg
(
"transducer"
)
=
OfflineTransducerModelConfig
(),
py
::
arg
(
"paraformer"
)
=
OfflineParaformerModelConfig
(),
py
::
arg
(
"nemo_ctc"
)
=
OfflineNemoEncDecCtcModelConfig
(),
py
::
arg
(
"tokens"
),
py
::
arg
(
"num_threads"
),
py
::
arg
(
"debug"
)
=
false
,
py
::
arg
(
"provider"
)
=
"cpu"
,
py
::
arg
(
"model_type"
)
=
""
)
.
def_readwrite
(
"transducer"
,
&
PyClass
::
transducer
)
.
def_readwrite
(
"paraformer"
,
&
PyClass
::
paraformer
)
.
def_readwrite
(
"nemo_ctc"
,
&
PyClass
::
nemo_ctc
)
...
...
@@ -37,6 +38,7 @@ void PybindOfflineModelConfig(py::module *m) {
.
def_readwrite
(
"num_threads"
,
&
PyClass
::
num_threads
)
.
def_readwrite
(
"debug"
,
&
PyClass
::
debug
)
.
def_readwrite
(
"provider"
,
&
PyClass
::
provider
)
.
def_readwrite
(
"model_type"
,
&
PyClass
::
model_type
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
);
}
...
...
sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
查看文件 @
f3206c4
...
...
@@ -86,6 +86,7 @@ class OfflineRecognizer(object):
num_threads
=
num_threads
,
debug
=
debug
,
provider
=
provider
,
model_type
=
"transducer"
,
)
feat_config
=
OfflineFeatureExtractorConfig
(
...
...
@@ -149,6 +150,7 @@ class OfflineRecognizer(object):
num_threads
=
num_threads
,
debug
=
debug
,
provider
=
provider
,
model_type
=
"paraformer"
,
)
feat_config
=
OfflineFeatureExtractorConfig
(
...
...
@@ -211,6 +213,7 @@ class OfflineRecognizer(object):
num_threads
=
num_threads
,
debug
=
debug
,
provider
=
provider
,
model_type
=
"nemo_ctc"
,
)
feat_config
=
OfflineFeatureExtractorConfig
(
...
...
请
注册
或
登录
后发表评论