Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2023-12-08 14:33:59 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2023-12-08 14:33:59 +0800
Commit
0e23f82691d3ea3a2fca7e698684e2c0c89eb95c
0e23f826
1 parent
868c339e
Give an informative log for whisper on exceptions. (#473)
显示空白字符变更
内嵌
并排对比
正在显示
7 个修改的文件
包含
76 行增加
和
14 行删除
python-api-examples/generate-subtitles.py
python-api-examples/non_streaming_server.py
python-api-examples/offline-decode-files.py
python-api-examples/two-pass-speech-recognition-from-microphone.py
python-api-examples/vad-with-non-streaming-asr.py
sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
python-api-examples/generate-subtitles.py
查看文件 @
0e23f82
...
...
@@ -181,6 +181,17 @@ def get_args():
)
parser
.
add_argument
(
"--whisper-tail-paddings"
,
default
=-
1
,
type
=
int
,
help
=
"""Number of tail padding frames.
We have removed the 30-second constraint from whisper, so you need to
choose the amount of tail padding frames by yourself.
Use -1 to use a default value for tail padding.
"""
,
)
parser
.
add_argument
(
"--decoding-method"
,
type
=
str
,
default
=
"greedy_search"
,
...
...
@@ -294,6 +305,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
debug
=
args
.
debug
,
language
=
args
.
whisper_language
,
task
=
args
.
whisper_task
,
tail_paddings
=
args
.
whisper_tail_paddings
,
)
else
:
raise
ValueError
(
"Please specify at least one model"
)
...
...
python-api-examples/non_streaming_server.py
查看文件 @
0e23f82
...
...
@@ -277,6 +277,17 @@ def add_whisper_model_args(parser: argparse.ArgumentParser):
"""
,
)
parser
.
add_argument
(
"--whisper-tail-paddings"
,
default
=-
1
,
type
=
int
,
help
=
"""Number of tail padding frames.
We have removed the 30-second constraint from whisper, so you need to
choose the amount of tail padding frames by yourself.
Use -1 to use a default value for tail padding.
"""
,
)
def
add_model_args
(
parser
:
argparse
.
ArgumentParser
):
add_transducer_model_args
(
parser
)
...
...
@@ -913,6 +924,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
decoding_method
=
args
.
decoding_method
,
language
=
args
.
whisper_language
,
task
=
args
.
whisper_task
,
tail_paddings
=
args
.
whisper_tail_paddings
,
)
elif
args
.
tdnn_model
:
assert_file_exists
(
args
.
tdnn_model
)
...
...
python-api-examples/offline-decode-files.py
查看文件 @
0e23f82
...
...
@@ -221,6 +221,17 @@ def get_args():
)
parser
.
add_argument
(
"--whisper-tail-paddings"
,
default
=-
1
,
type
=
int
,
help
=
"""Number of tail padding frames.
We have removed the 30-second constraint from whisper, so you need to
choose the amount of tail padding frames by yourself.
Use -1 to use a default value for tail padding.
"""
,
)
parser
.
add_argument
(
"--decoding-method"
,
type
=
str
,
default
=
"greedy_search"
,
...
...
@@ -391,6 +402,7 @@ def main():
debug
=
args
.
debug
,
language
=
args
.
whisper_language
,
task
=
args
.
whisper_task
,
tail_paddings
=
args
.
whisper_tail_paddings
,
)
elif
args
.
tdnn_model
:
assert_file_exists
(
args
.
tdnn_model
)
...
...
python-api-examples/two-pass-speech-recognition-from-microphone.py
查看文件 @
0e23f82
...
...
@@ -195,6 +195,17 @@ def add_second_pass_whisper_model_args(parser: argparse.ArgumentParser):
"""
,
)
parser
.
add_argument
(
"--second-whisper-tail-paddings"
,
default
=-
1
,
type
=
int
,
help
=
"""Number of tail padding frames.
We have removed the 30-second constraint from whisper, so you need to
choose the amount of tail padding frames by yourself.
Use -1 to use a default value for tail padding.
"""
,
)
def
add_second_pass_non_streaming_model_args
(
parser
:
argparse
.
ArgumentParser
):
add_second_pass_transducer_model_args
(
parser
)
...
...
@@ -314,6 +325,7 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
decoding_method
=
"greedy_search"
,
language
=
args
.
second_whisper_language
,
task
=
args
.
second_whisper_task
,
tail_paddings
=
args
.
second_whisper_tail_paddings
,
)
else
:
raise
ValueError
(
"Please specify at least one model for the second pass"
)
...
...
python-api-examples/vad-with-non-streaming-asr.py
查看文件 @
0e23f82
...
...
@@ -167,6 +167,17 @@ def get_args():
)
parser
.
add_argument
(
"--whisper-tail-paddings"
,
default
=-
1
,
type
=
int
,
help
=
"""Number of tail padding frames.
We have removed the 30-second constraint from whisper, so you need to
choose the amount of tail padding frames by yourself.
Use -1 to use a default value for tail padding.
"""
,
)
parser
.
add_argument
(
"--decoding-method"
,
type
=
str
,
default
=
"greedy_search"
,
...
...
@@ -256,6 +267,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
debug
=
args
.
debug
,
language
=
args
.
whisper_language
,
task
=
args
.
whisper_task
,
tail_paddings
=
args
.
whisper_tail_paddings
,
)
else
:
raise
ValueError
(
"Please specify at least one model"
)
...
...
sherpa-onnx/csrc/offline-recognizer-whisper-impl.h
查看文件 @
0e23f82
...
...
@@ -116,18 +116,12 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
NormalizeFeatures
(
f
.
data
(),
num_frames
,
feat_dim
);
// note that 50 is an experience value.
// see also ../../scripts/whisper/test.py
//
// You can replace 50 by other values, say, 100.
// note that 1000 is an experience-value.
// You can replace 1000 by other values, say, 100.
//
// Since we have removed the 30 seconds constraint, we need
// tail_padding_frames so that whisper is able to detect the eot token.
int32_t
tail_padding_frames
=
50
;
if
(
model_
->
IsMultiLingual
())
{
// 300 is an experience value. If it throws, please use a larger value.
tail_padding_frames
=
300
;
}
int32_t
tail_padding_frames
=
1000
;
if
(
config_
.
model_config
.
whisper
.
tail_paddings
>
0
)
{
tail_padding_frames
=
config_
.
model_config
.
whisper
.
tail_paddings
;
...
...
@@ -140,11 +134,13 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
Ort
::
Value
mel
=
Ort
::
Value
::
CreateTensor
<
float
>
(
model_
->
Allocator
(),
shape
.
data
(),
shape
.
size
());
float
*
p_mel
=
mel
.
GetTensorMutableData
<
float
>
();
std
::
copy
(
f
.
data
(),
f
.
data
()
+
actual_frames
*
feat_dim
,
p_mel
);
std
::
copy
(
f
.
data
(),
f
.
data
()
+
num_frames
*
feat_dim
,
p_mel
);
std
::
fill_n
(
p_mel
+
num_frames
*
feat_dim
,
(
actual_frames
-
num_frames
)
*
feat_dim
,
0
);
memset
(
p_mel
+
f
.
size
(),
0
,
(
actual_frames
-
num_frames
)
*
feat_dim
*
sizeof
(
float
));
mel
=
Transpose12
(
model_
->
Allocator
(),
&
mel
);
try
{
...
...
@@ -156,8 +152,12 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
auto
r
=
Convert
(
results
[
0
],
symbol_table_
);
s
->
SetResult
(
r
);
}
catch
(
const
Ort
::
Exception
&
ex
)
{
SHERPA_ONNX_LOGE
(
"
\n\n
Caught exception:
\n\n
%s
\n\n
Return an empty result"
,
ex
.
what
());
SHERPA_ONNX_LOGE
(
"
\n\n
Caught exception:
\n\n
%s
\n\n
Return an empty result. Number of "
"input frames: %d, Current tail "
"paddings: %d. If you see a lot of such exceptions, please consider "
"using a larger --whisper-tail-paddings"
,
ex
.
what
(),
num_frames
,
tail_padding_frames
);
return
;
}
}
...
...
sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
查看文件 @
0e23f82
...
...
@@ -261,6 +261,7 @@ class OfflineRecognizer(object):
decoding_method
:
str
=
"greedy_search"
,
debug
:
bool
=
False
,
provider
:
str
=
"cpu"
,
tail_paddings
:
int
=
-
1
,
):
"""
Please refer to
...
...
@@ -305,6 +306,7 @@ class OfflineRecognizer(object):
decoder
=
decoder
,
language
=
language
,
task
=
task
,
tail_paddings
=
tail_paddings
,
),
tokens
=
tokens
,
num_threads
=
num_threads
,
...
...
请
注册
或
登录
后发表评论