Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-05-09 15:32:22 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-05-09 15:32:22 +0800
Commit
5d8c35e44ea967f3a1763c7402099f886a26b1a9
5d8c35e4
1 parent
5ed3ec1c
Add C++ support for non-streaming NeMo fast conformer hybrid transducer ctc (the ctc branch) (#848)
隐藏空白字符变更
内嵌
并排对比
正在显示
8 个修改的文件
包含
155 行增加
和
33 行删除
.github/scripts/test-offline-ctc.sh
.github/scripts/test-spoken-language-identification.sh
.github/workflows/linux.yaml
.github/workflows/macos.yaml
sherpa-onnx/csrc/offline-ctc-model.cc
sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
sherpa-onnx/csrc/offline-recognizer-impl.cc
sherpa-onnx/csrc/symbol-table.cc
.github/scripts/test-offline-ctc.sh
查看文件 @
5d8c35e
...
...
@@ -13,14 +13,111 @@ echo "PATH: $PATH"
which
$EXE
log
"-----------------------------------------------------------------"
log
"Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
log
"-----------------------------------------------------------------"
url
=
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
name
=
$(
basename
$url
)
curl -SL -O
$url
tar xvf
$name
rm
$name
repo
=
$(
basename -s .tar.bz2
$name
)
ls -lh
$repo
log
"test
$repo
"
test_wavs
=(
de-german.wav
es-spanish.wav
hr-croatian.wav
po-polish.wav
uk-ukrainian.wav
en-english.wav
fr-french.wav
it-italian.wav
ru-russian.wav
)
for
w
in
${
test_wavs
[@]
}
;
do
time
$EXE
\
--tokens
=
$repo
/tokens.txt
\
--nemo-ctc-model
=
$repo
/model.onnx
\
--debug
=
1
\
$repo
/test_wavs/
$w
done
rm -rf
$repo
url
=
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
name
=
$(
basename
$url
)
curl -SL -O
$url
tar xvf
$name
rm
$name
repo
=
$(
basename -s .tar.bz2
$name
)
ls -lh
$repo
log
"Test
$repo
"
time
$EXE
\
--tokens
=
$repo
/tokens.txt
\
--nemo-ctc-model
=
$repo
/model.onnx
\
--debug
=
1
\
$repo
/test_wavs/en-english.wav
rm -rf
$repo
url
=
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
name
=
$(
basename
$url
)
curl -SL -O
$url
tar xvf
$name
rm
$name
repo
=
$(
basename -s .tar.bz2
$name
)
ls -lh
$repo
log
"test
$repo
"
time
$EXE
\
--tokens
=
$repo
/tokens.txt
\
--nemo-ctc-model
=
$repo
/model.onnx
\
--debug
=
1
\
$repo
/test_wavs/es-spanish.wav
rm -rf
$repo
url
=
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
name
=
$(
basename
$url
)
curl -SL -O
$url
tar xvf
$name
rm
$name
repo
=
$(
basename -s .tar.bz2
$name
)
ls -lh
$repo
log
"Test
$repo
"
test_wavs
=(
en-english.wav
de-german.wav
fr-french.wav
es-spanish.wav
)
for
w
in
${
test_wavs
[@]
}
;
do
time
$EXE
\
--tokens
=
$repo
/tokens.txt
\
--nemo-ctc-model
=
$repo
/model.onnx
\
--debug
=
1
\
$repo
/test_wavs/
$w
done
rm -rf
$repo
log
"------------------------------------------------------------"
log
"Run Wenet models"
log
"------------------------------------------------------------"
wenet_models
=(
sherpa-onnx-zh-wenet-aishell
sherpa-onnx-zh-wenet-aishell2
#
sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-wenetspeech
sherpa-onnx-zh-wenet-multi-cn
#
sherpa-onnx-zh-wenet-multi-cn
sherpa-onnx-en-wenet-librispeech
# sherpa-onnx-en-wenet-gigaspeech
)
...
...
.github/scripts/test-spoken-language-identification.sh
查看文件 @
5d8c35e
...
...
@@ -62,6 +62,11 @@ for wav in ${waves[@]}; do
ls -lh
*
.wav
done
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
data
=
spoken-language-identification-test-wavs
for
name
in
${
names
[@]
}
;
do
log
"------------------------------------------------------------"
log
"Run
$name
"
...
...
@@ -85,14 +90,14 @@ for name in ${names[@]}; do
time
$EXE
\
--whisper-encoder
=
$repo
/
${
name
}
-encoder.onnx
\
--whisper-decoder
=
$repo
/
${
name
}
-decoder.onnx
\
$wav
$
data
/
$
wav
log
"test int8 onnx"
time
$EXE
\
--whisper-encoder
=
$repo
/
${
name
}
-encoder.int8.onnx
\
--whisper-decoder
=
$repo
/
${
name
}
-decoder.int8.onnx
\
$wav
$
data
/
$
wav
done
rm -rf
$repo
done
...
...
.github/workflows/linux.yaml
查看文件 @
5d8c35e
...
...
@@ -128,13 +128,13 @@ jobs:
name
:
release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path
:
install/*
-
name
:
Test
offline punctuation
-
name
:
Test
spoken language identification (C++ API)
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-
punctu
ation
export EXE=sherpa-onnx-offline-
language-identific
ation
.github/scripts/test-
offline-punctu
ation.sh
.github/scripts/test-
spoken-language-identific
ation.sh
-
name
:
Test C API
shell
:
bash
...
...
@@ -147,13 +147,13 @@ jobs:
.github/scripts/test-c-api.sh
-
name
:
Test
Audio tagging
-
name
:
Test
offline CTC
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
-audio-tagging
export EXE=sherpa-onnx-offline
.github/scripts/test-
audio-tagging
.sh
.github/scripts/test-
offline-ctc
.sh
-
name
:
Test online CTC
shell
:
bash
...
...
@@ -163,14 +163,21 @@ jobs:
.github/scripts/test-online-ctc.sh
-
name
:
Test offline punctuation
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-offline-punctuation.sh
-
name
:
Test
spoken language identification (C++ API)
-
name
:
Test
Audio tagging
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-
language-identification
export EXE=sherpa-onnx-offline-
audio-tagging
.github/scripts/test-
spoken-language-identification
.sh
.github/scripts/test-
audio-tagging
.sh
-
name
:
Test transducer kws
shell
:
bash
...
...
@@ -180,7 +187,6 @@ jobs:
.github/scripts/test-kws.sh
-
name
:
Test offline Whisper
if
:
matrix.build_type != 'Debug'
shell
:
bash
...
...
@@ -192,14 +198,6 @@ jobs:
.github/scripts/test-offline-whisper.sh
-
name
:
Test offline CTC
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
-
name
:
Test offline TTS
if
:
matrix.with_tts == 'ON'
shell
:
bash
...
...
.github/workflows/macos.yaml
查看文件 @
5d8c35e
...
...
@@ -107,6 +107,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
-
name
:
Test online CTC
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-ctc.sh
-
name
:
Test offline punctuation
shell
:
bash
run
:
|
...
...
@@ -150,14 +158,6 @@ jobs:
.github/scripts/test-kws.sh
-
name
:
Test online CTC
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-ctc.sh
-
name
:
Test offline TTS
if
:
matrix.with_tts == 'ON'
shell
:
bash
...
...
sherpa-onnx/csrc/offline-ctc-model.cc
查看文件 @
5d8c35e
...
...
@@ -20,6 +20,7 @@ namespace {
enum
class
ModelType
{
kEncDecCTCModelBPE
,
kEncDecHybridRNNTCTCBPEModel
,
kTdnn
,
kZipformerCtc
,
kWenetCtc
,
...
...
@@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
"No model_type in the metadata!
\n
"
"If you are using models from NeMo, please refer to
\n
"
"https://huggingface.co/csukuangfj/"
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py
\n
"
"or "
"https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/"
"fast-conformer-hybrid-transducer-ctc
\n
"
"If you are using models from WeNet, please refer to
\n
"
"https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
"run.sh
\n
"
...
...
@@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
if
(
model_type
.
get
()
==
std
::
string
(
"EncDecCTCModelBPE"
))
{
return
ModelType
::
kEncDecCTCModelBPE
;
}
else
if
(
model_type
.
get
()
==
std
::
string
(
"EncDecHybridRNNTCTCBPEModel"
))
{
return
ModelType
::
kEncDecHybridRNNTCTCBPEModel
;
}
else
if
(
model_type
.
get
()
==
std
::
string
(
"tdnn"
))
{
return
ModelType
::
kTdnn
;
}
else
if
(
model_type
.
get
()
==
std
::
string
(
"zipformer2_ctc"
))
{
...
...
@@ -106,6 +112,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
case
ModelType
:
:
kEncDecCTCModelBPE
:
return
std
::
make_unique
<
OfflineNemoEncDecCtcModel
>
(
config
);
break
;
case
ModelType
:
:
kEncDecHybridRNNTCTCBPEModel
:
return
std
::
make_unique
<
OfflineNemoEncDecHybridRNNTCTCBPEModel
>
(
config
);
break
;
case
ModelType
:
:
kTdnn
:
return
std
::
make_unique
<
OfflineTdnnCtcModel
>
(
config
);
break
;
...
...
@@ -153,6 +162,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
case
ModelType
:
:
kEncDecCTCModelBPE
:
return
std
::
make_unique
<
OfflineNemoEncDecCtcModel
>
(
mgr
,
config
);
break
;
case
ModelType
:
:
kEncDecHybridRNNTCTCBPEModel
:
return
std
::
make_unique
<
OfflineNemoEncDecHybridRNNTCTCBPEModel
>
(
config
);
break
;
case
ModelType
:
:
kTdnn
:
return
std
::
make_unique
<
OfflineTdnnCtcModel
>
(
mgr
,
config
);
break
;
...
...
sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
查看文件 @
5d8c35e
...
...
@@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
std
::
unique_ptr
<
Impl
>
impl_
;
};
using
OfflineNemoEncDecHybridRNNTCTCBPEModel
=
OfflineNemoEncDecCtcModel
;
}
// namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
...
...
sherpa-onnx/csrc/offline-recognizer-impl.cc
查看文件 @
5d8c35e
...
...
@@ -122,7 +122,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
return
std
::
make_unique
<
OfflineRecognizerParaformerImpl
>
(
config
);
}
if
(
model_type
==
"EncDecCTCModelBPE"
||
model_type
==
"tdnn"
||
if
(
model_type
==
"EncDecCTCModelBPE"
||
model_type
==
"EncDecHybridRNNTCTCBPEModel"
||
model_type
==
"tdnn"
||
model_type
==
"zipformer2_ctc"
||
model_type
==
"wenet_ctc"
)
{
return
std
::
make_unique
<
OfflineRecognizerCtcImpl
>
(
config
);
}
...
...
@@ -137,6 +138,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
" - Non-streaming transducer models from icefall
\n
"
" - Non-streaming Paraformer models from FunASR
\n
"
" - EncDecCTCModelBPE models from NeMo
\n
"
" - EncDecHybridRNNTCTCBPEModel models from NeMo
\n
"
" - Whisper models
\n
"
" - Tdnn models
\n
"
" - Zipformer CTC models
\n
"
...
...
@@ -252,7 +254,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
return
std
::
make_unique
<
OfflineRecognizerParaformerImpl
>
(
mgr
,
config
);
}
if
(
model_type
==
"EncDecCTCModelBPE"
||
model_type
==
"tdnn"
||
if
(
model_type
==
"EncDecCTCModelBPE"
||
model_type
==
"EncDecHybridRNNTCTCBPEModel"
||
model_type
==
"tdnn"
||
model_type
==
"zipformer2_ctc"
||
model_type
==
"wenet_ctc"
)
{
return
std
::
make_unique
<
OfflineRecognizerCtcImpl
>
(
mgr
,
config
);
}
...
...
@@ -267,6 +270,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
" - Non-streaming transducer models from icefall
\n
"
" - Non-streaming Paraformer models from FunASR
\n
"
" - EncDecCTCModelBPE models from NeMo
\n
"
" - EncDecHybridRNNTCTCBPEModel models from NeMo
\n
"
" - Whisper models
\n
"
" - Tdnn models
\n
"
" - Zipformer CTC models
\n
"
...
...
sherpa-onnx/csrc/symbol-table.cc
查看文件 @
5d8c35e
...
...
@@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) {
// the following check.
//
// Note: Only id2sym_ matters as we use it to convert ID to symbols.
#if 0
// we disable the test here since for some multi-lingual BPE models
// from NeMo, the same symbol can appear multiple times with different IDs.
if (sym != " ") {
assert(sym2id_.count(sym) == 0);
}
#endif
assert
(
id2sym_
.
count
(
id
)
==
0
);
...
...
请
注册
或
登录
后发表评论