Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-06-03 16:30:28 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-06-03 16:30:28 +0800
Commit
b31b9f3a2d634f7bd43eace26316ed002647ee9e
b31b9f3a
1 parent
9edb78e2
Add a VAD Python example to remove silences from a file. (#963)
隐藏空白字符变更
内嵌
并排对比
正在显示
1 个修改的文件
包含
116 行增加
和
0 行删除
python-api-examples/vad-remove-non-speech-segments-from-file.py
python-api-examples/vad-remove-non-speech-segments-from-file.py
0 → 100755
查看文件 @
b31b9f3
#!/usr/bin/env python3
"""
This file shows how to remove non-speech segments
and merge all speech segments into a large segment
and save it to a file.
Usage
python3 ./vad-remove-non-speech-segments-from-file.py
\
--silero-vad-model silero_vad.onnx
\
input.wav
\
output.wav
Please visit
https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
"""
import
argparse
from
pathlib
import
Path
from
typing
import
Tuple
import
numpy
as
np
import
sherpa_onnx
import
soundfile
as
sf
def
assert_file_exists
(
filename
:
str
):
assert
Path
(
filename
)
.
is_file
(),
(
f
"{filename} does not exist!
\n
"
"Please refer to "
"https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html to download it"
)
def
get_args
():
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
"--silero-vad-model"
,
type
=
str
,
required
=
True
,
help
=
"Path to silero_vad.onnx"
,
)
parser
.
add_argument
(
"input"
,
type
=
str
,
help
=
"Path to input.wav"
,
)
parser
.
add_argument
(
"output"
,
type
=
str
,
help
=
"Path to output.wav"
,
)
return
parser
.
parse_args
()
def
load_audio
(
filename
:
str
)
->
Tuple
[
np
.
ndarray
,
int
]:
data
,
sample_rate
=
sf
.
read
(
filename
,
always_2d
=
True
,
dtype
=
"float32"
,
)
data
=
data
[:,
0
]
# use only the first channel
samples
=
np
.
ascontiguousarray
(
data
)
return
samples
,
sample_rate
def
main
():
args
=
get_args
()
assert_file_exists
(
args
.
silero_vad_model
)
assert_file_exists
(
args
.
input
)
samples
,
sample_rate
=
load_audio
(
args
.
input
)
if
sample_rate
!=
16000
:
import
librosa
samples
=
librosa
.
resample
(
samples
,
orig_sr
=
sample_rate
,
target_sr
=
16000
)
sample_rate
=
16000
config
=
sherpa_onnx
.
VadModelConfig
()
config
.
silero_vad
.
model
=
args
.
silero_vad_model
config
.
sample_rate
=
sample_rate
window_size
=
config
.
silero_vad
.
window_size
vad
=
sherpa_onnx
.
VoiceActivityDetector
(
config
,
buffer_size_in_seconds
=
30
)
speech_samples
=
[]
while
len
(
samples
)
>
window_size
:
vad
.
accept_waveform
(
samples
[:
window_size
])
samples
=
samples
[
window_size
:]
while
not
vad
.
empty
():
speech_samples
.
extend
(
vad
.
front
.
samples
)
vad
.
pop
()
speech_samples
=
np
.
array
(
speech_samples
,
dtype
=
np
.
float32
)
sf
.
write
(
args
.
output
,
speech_samples
,
samplerate
=
sample_rate
)
print
(
f
"Saved to {args.output}"
)
if
__name__
==
"__main__"
:
main
()
...
...
请
注册
或
登录
后发表评论