Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2023-08-17 13:18:59 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2023-08-17 13:18:59 +0800
Commit
5c5b33523281b8f8797ec0fec418a6597be51371
5c5b3352
1 parent
eb22b484
Remove whisper dependency from the whisper Python example (#283)
隐藏空白字符变更
内嵌
并排对比
正在显示
1 个修改的文件
包含
25 行增加
和
11 行删除
scripts/whisper/test.py
scripts/whisper/test.py
查看文件 @
5c5b335
...
...
@@ -4,15 +4,14 @@
Please first run ./export-onnx.py
before you run this script
"""
import
argparse
import
base64
from
typing
import
Tuple
import
kaldi_native_fbank
as
knf
import
onnxruntime
as
ort
import
torch
import
whisper
import
argparse
import
torchaudio
def
get_args
():
...
...
@@ -225,16 +224,24 @@ def load_tokens(filename):
return
tokens
def
main
():
args
=
get_args
()
encoder
=
args
.
encoder
decoder
=
args
.
decoder
audio
=
whisper
.
load_audio
(
args
.
sound_file
)
def
compute_features
(
filename
:
str
)
->
torch
.
Tensor
:
"""
Args:
filename:
Path to an audio file.
Returns:
Return a 1-D float32 tensor of shape (1, 80, 3000) containing the features.
"""
wave
,
sample_rate
=
torchaudio
.
load
(
filename
)
audio
=
wave
[
0
]
.
contiguous
()
# only use the first channel
if
sample_rate
!=
16000
:
audio
=
torchaudio
.
functional
.
resample
(
audio
,
orig_freq
=
sample_rate
,
new_freq
=
16000
)
features
=
[]
online_whisper_fbank
=
knf
.
OnlineWhisperFbank
(
knf
.
FrameExtractionOptions
())
online_whisper_fbank
.
accept_waveform
(
16000
,
audio
)
online_whisper_fbank
.
accept_waveform
(
16000
,
audio
.
numpy
()
)
online_whisper_fbank
.
input_finished
()
for
i
in
range
(
online_whisper_fbank
.
num_frames_ready
):
f
=
online_whisper_fbank
.
get_frame
(
i
)
...
...
@@ -250,7 +257,14 @@ def main():
mel
=
torch
.
nn
.
functional
.
pad
(
mel
,
(
0
,
0
,
0
,
target
-
mel
.
shape
[
0
]),
"constant"
,
0
)
mel
=
mel
.
t
()
.
unsqueeze
(
0
)
model
=
OnnxModel
(
encoder
,
decoder
)
return
mel
def
main
():
args
=
get_args
()
mel
=
compute_features
(
args
.
sound_file
)
model
=
OnnxModel
(
args
.
encoder
,
args
.
decoder
)
n_layer_cross_k
,
n_layer_cross_v
=
model
.
run_encoder
(
mel
)
...
...
请
注册
或
登录
后发表评论