Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-07-28 21:54:38 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-07-28 21:54:38 +0800
Commit
d279c8d20e7175323898477c0ad0bb02c26ffeef
d279c8d2
1 parent
9e005f53
Add more Python examples for SenseVoice (#1179)
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
141 行增加
和
4 行删除
.github/scripts/test-python.sh
.github/workflows/run-python-test.yaml
python-api-examples/generate-subtitles.py
python-api-examples/non_streaming_server.py
python-api-examples/offline-sense-voice-ctc-decode-files.py
python-api-examples/vad-with-non-streaming-asr.py
.github/scripts/test-python.sh
查看文件 @
d279c8d
...
...
@@ -20,6 +20,38 @@ tar xvf $name
rm
$name
ls -lh
$repo
python3 ./python-api-examples/offline-sense-voice-ctc-decode-files.py
if
[[
$(
uname
)
==
Linux
]]
;
then
# It needs ffmpeg
log
"generate subtitles (Chinese)"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
python3 ./python-api-examples/generate-subtitles.py
\
--silero-vad-model
=
./silero_vad.onnx
\
--sense-voice
=
$repo
/model.onnx
\
--tokens
=
$repo
/tokens.txt
\
--num-threads
=
2
\
./lei-jun-test.wav
cat lei-jun-test.srt
rm lei-jun-test.wav
log
"generate subtitles (English)"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
python3 ./python-api-examples/generate-subtitles.py
\
--silero-vad-model
=
./silero_vad.onnx
\
--sense-voice
=
$repo
/model.onnx
\
--tokens
=
$repo
/tokens.txt
\
--num-threads
=
2
\
./Obama.wav
cat Obama.srt
rm Obama.wav
rm silero_vad.onnx
fi
rm -rf
$repo
log
"test offline TeleSpeech CTC"
...
...
.github/workflows/run-python-test.yaml
查看文件 @
d279c8d
...
...
@@ -79,6 +79,11 @@ jobs:
python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile
python3 -m pip install wheel twine setuptools
-
name
:
Install ffmpeg
shell
:
bash
run
:
|
sudo apt-get install ffmpeg
-
name
:
Install ninja
shell
:
bash
run
:
|
...
...
python-api-examples/generate-subtitles.py
查看文件 @
d279c8d
...
...
@@ -12,12 +12,12 @@ Supported file formats are those supported by ffmpeg; for instance,
Note that you need a non-streaming model for this script.
Please visit
https://github.com/
snakers4/silero-vad/raw/master/src/silero_vad/data
/silero_vad.onnx
https://github.com/
k2-fsa/sherpa-onnx/releases/download/asr-models
/silero_vad.onnx
to download silero_vad.onnx
For instance,
wget https://github.com/
snakers4/silero-vad/raw/master/src/silero_vad/data
/silero_vad.onnx
wget https://github.com/
k2-fsa/sherpa-onnx/releases/download/asr-models
/silero_vad.onnx
(1) For paraformer
...
...
@@ -58,7 +58,17 @@ wget https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/siler
--num-threads=2
\
/path/to/test.mp4
(4) For WeNet CTC models
(4) For SenseVoice CTC models
./python-api-examples/generate-subtitles.py
\
--silero-vad-model=/path/to/silero_vad.onnx
\
--sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx
\
--tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt
\
--num-threads=2
\
/path/to/test.mp4
(5) For WeNet CTC models
./python-api-examples/generate-subtitles.py
\
--silero-vad-model=/path/to/silero_vad.onnx
\
...
...
@@ -131,6 +141,13 @@ def get_args():
)
parser
.
add_argument
(
"--sense-voice"
,
default
=
""
,
type
=
str
,
help
=
"Path to the model.onnx from SenseVoice"
,
)
parser
.
add_argument
(
"--wenet-ctc"
,
default
=
""
,
type
=
str
,
...
...
@@ -242,6 +259,7 @@ def assert_file_exists(filename: str):
def
create_recognizer
(
args
)
->
sherpa_onnx
.
OfflineRecognizer
:
if
args
.
encoder
:
assert
len
(
args
.
paraformer
)
==
0
,
args
.
paraformer
assert
len
(
args
.
sense_voice
)
==
0
,
args
.
sense_voice
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
...
...
@@ -262,6 +280,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
debug
=
args
.
debug
,
)
elif
args
.
paraformer
:
assert
len
(
args
.
sense_voice
)
==
0
,
args
.
sense_voice
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
...
...
@@ -277,6 +296,19 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
decoding_method
=
args
.
decoding_method
,
debug
=
args
.
debug
,
)
elif
args
.
sense_voice
:
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
assert_file_exists
(
args
.
sense_voice
)
recognizer
=
sherpa_onnx
.
OfflineRecognizer
.
from_sense_voice
(
model
=
args
.
sense_voice
,
tokens
=
args
.
tokens
,
num_threads
=
args
.
num_threads
,
use_itn
=
True
,
debug
=
args
.
debug
,
)
elif
args
.
wenet_ctc
:
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
...
...
@@ -406,6 +438,9 @@ def main():
vad
.
accept_waveform
(
buffer
[:
window_size
])
buffer
=
buffer
[
window_size
:]
if
is_silence
:
vad
.
flush
()
streams
=
[]
segments
=
[]
while
not
vad
.
empty
():
...
...
python-api-examples/non_streaming_server.py
查看文件 @
d279c8d
...
...
@@ -92,6 +92,16 @@ python3 ./python-api-examples/non_streaming_server.py \
--tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx
\
--tokens=./sherpa-onnx-tdnn-yesno/tokens.txt
(6) Use a Non-streaming SenseVoice model
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
python3 ./python-api-examples/non_streaming_server.py
\
--sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx
\
--tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt
----
To use a certificate so that you can use https, please use
...
...
@@ -208,6 +218,15 @@ def add_paraformer_model_args(parser: argparse.ArgumentParser):
)
def
add_sense_voice_model_args
(
parser
:
argparse
.
ArgumentParser
):
parser
.
add_argument
(
"--sense-voice"
,
default
=
""
,
type
=
str
,
help
=
"Path to the model.onnx from SenseVoice"
,
)
def
add_nemo_ctc_model_args
(
parser
:
argparse
.
ArgumentParser
):
parser
.
add_argument
(
"--nemo-ctc"
,
...
...
@@ -287,6 +306,7 @@ def add_whisper_model_args(parser: argparse.ArgumentParser):
def
add_model_args
(
parser
:
argparse
.
ArgumentParser
):
add_transducer_model_args
(
parser
)
add_paraformer_model_args
(
parser
)
add_sense_voice_model_args
(
parser
)
add_nemo_ctc_model_args
(
parser
)
add_wenet_ctc_model_args
(
parser
)
add_tdnn_ctc_model_args
(
parser
)
...
...
@@ -850,6 +870,7 @@ def assert_file_exists(filename: str):
def
create_recognizer
(
args
)
->
sherpa_onnx
.
OfflineRecognizer
:
if
args
.
encoder
:
assert
len
(
args
.
paraformer
)
==
0
,
args
.
paraformer
assert
len
(
args
.
sense_voice
)
==
0
,
args
.
sense_voice
assert
len
(
args
.
nemo_ctc
)
==
0
,
args
.
nemo_ctc
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
...
...
@@ -876,6 +897,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
provider
=
args
.
provider
,
)
elif
args
.
paraformer
:
assert
len
(
args
.
sense_voice
)
==
0
,
args
.
sense_voice
assert
len
(
args
.
nemo_ctc
)
==
0
,
args
.
nemo_ctc
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
...
...
@@ -893,6 +915,20 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
decoding_method
=
args
.
decoding_method
,
provider
=
args
.
provider
,
)
elif
args
.
sense_voice
:
assert
len
(
args
.
nemo_ctc
)
==
0
,
args
.
nemo_ctc
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
assert
len
(
args
.
tdnn_model
)
==
0
,
args
.
tdnn_model
assert_file_exists
(
args
.
sense_voice
)
recognizer
=
sherpa_onnx
.
OfflineRecognizer
.
from_sense_voice
(
model
=
args
.
sense_voice
,
tokens
=
args
.
tokens
,
num_threads
=
args
.
num_threads
,
use_itn
=
True
,
)
elif
args
.
nemo_ctc
:
assert
len
(
args
.
wenet_ctc
)
==
0
,
args
.
wenet_ctc
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
...
...
python-api-examples/offline-sense-voice-ctc-decode-files.py
查看文件 @
d279c8d
...
...
@@ -22,7 +22,7 @@ import soundfile as sf
def
create_recognizer
():
model
=
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.
int8.
onnx"
model
=
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx"
tokens
=
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
test_wav
=
"./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
# test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"
...
...
python-api-examples/vad-with-non-streaming-asr.py
查看文件 @
d279c8d
...
...
@@ -45,6 +45,14 @@ Note that you need a non-streaming model for this script.
--whisper-task=transcribe
\
--num-threads=2
(4) For SenseVoice CTC models
./python-api-examples/vad-with-non-streaming-asr.py
\
--silero-vad-model=/path/to/silero_vad.onnx
\
--sense-voice=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx
\
--tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt
\
--num-threads=2
Please refer to
https://k2-fsa.github.io/sherpa/onnx/index.html
to install sherpa-onnx and to download non-streaming pre-trained models
...
...
@@ -124,6 +132,13 @@ def get_args():
)
parser
.
add_argument
(
"--sense-voice"
,
default
=
""
,
type
=
str
,
help
=
"Path to the model.onnx from SenseVoice"
,
)
parser
.
add_argument
(
"--num-threads"
,
type
=
int
,
default
=
1
,
...
...
@@ -233,6 +248,7 @@ def assert_file_exists(filename: str):
def
create_recognizer
(
args
)
->
sherpa_onnx
.
OfflineRecognizer
:
if
args
.
encoder
:
assert
len
(
args
.
paraformer
)
==
0
,
args
.
paraformer
assert
len
(
args
.
sense_voice
)
==
0
,
args
.
sense_voice
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
...
...
@@ -253,6 +269,7 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
debug
=
args
.
debug
,
)
elif
args
.
paraformer
:
assert
len
(
args
.
sense_voice
)
==
0
,
args
.
sense_voice
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
...
...
@@ -267,6 +284,18 @@ def create_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
decoding_method
=
args
.
decoding_method
,
debug
=
args
.
debug
,
)
elif
args
.
sense_voice
:
assert
len
(
args
.
whisper_encoder
)
==
0
,
args
.
whisper_encoder
assert
len
(
args
.
whisper_decoder
)
==
0
,
args
.
whisper_decoder
assert_file_exists
(
args
.
sense_voice
)
recognizer
=
sherpa_onnx
.
OfflineRecognizer
.
from_sense_voice
(
model
=
args
.
sense_voice
,
tokens
=
args
.
tokens
,
num_threads
=
args
.
num_threads
,
use_itn
=
True
,
debug
=
args
.
debug
,
)
elif
args
.
whisper_encoder
:
assert_file_exists
(
args
.
whisper_encoder
)
assert_file_exists
(
args
.
whisper_decoder
)
...
...
请
注册
或
登录
后发表评论