Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-02-10 15:38:29 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-02-10 15:38:29 +0800
Commit
9559a10bd36fb785d1089e0c7527448f1c1c5d63
9559a10b
1 parent
7d62ccf1
Add C++ support for MatchaTTS models not from icefall. (#1834)
显示空白字符变更
内嵌
并排对比
正在显示
5 个修改的文件
包含
59 行增加
和
5 行删除
.github/scripts/test-offline-tts.sh
scripts/apk/generate-tts-apk-script.py
sherpa-onnx/csrc/offline-tts-matcha-impl.h
sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
sherpa-onnx/csrc/offline-tts-matcha-model.cc
.github/scripts/test-offline-tts.sh
查看文件 @
9559a10
...
...
@@ -44,6 +44,28 @@ done
rm -rf kokoro-en-v0_19
log
"------------------------------------------------------------"
log
"matcha-tts-fa_en-male"
log
"------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-tts-fa_en-male.tar.bz2
tar xvf matcha-tts-fa_en-male.tar.bz2
rm matcha-tts-fa_en-male.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
$EXE
\
--matcha-acoustic-model
=
./matcha-tts-fa_en-male/model.onnx
\
--matcha-vocoder
=
./hifigan_v2.onnx
\
--matcha-tokens
=
./matcha-tts-fa_en-male/tokens.txt
\
--matcha-data-dir
=
./matcha-tts-fa_en-male/espeak-ng-data
\
--output-filename
=
./tts/test-matcha-fa-en-male.wav
\
--num-threads
=
2
\
"How are you doing today? این یک نمونه ی تست فارسی است. This is a test."
rm -rf matcha-tts-fa_en-male
rm hifigan_v2.onnx
ls -lh tts/
*
.wav
log
"------------------------------------------------------------"
log
"matcha-icefall-en_US-ljspeech"
log
"------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
...
...
@@ -64,6 +86,7 @@ $EXE \
rm hifigan_v2.onnx
rm -rf matcha-icefall-en_US-ljspeech
ls -lh tts/
*
.wav
log
"------------------------------------------------------------"
log
"matcha-icefall-zh-baker"
...
...
scripts/apk/generate-tts-apk-script.py
查看文件 @
9559a10
...
...
@@ -397,18 +397,28 @@ def get_matcha_models() -> List[TtsModel]:
m
.
dict_dir
=
m
.
model_dir
+
"/dict"
m
.
vocoder
=
"hifigan_v2.onnx"
english_models
=
[
english_
persian_
models
=
[
TtsModel
(
model_dir
=
"matcha-icefall-en_US-ljspeech"
,
acoustic_model_name
=
"model-steps-3.onnx"
,
lang
=
"en"
,
)
),
TtsModel
(
model_dir
=
"matcha-tts-fa_en-male"
,
acoustic_model_name
=
"model.onnx"
,
lang
=
"fa"
,
),
TtsModel
(
model_dir
=
"matcha-tts-fa_en-female"
,
acoustic_model_name
=
"model.onnx"
,
lang
=
"fa"
,
),
]
for
m
in
english_models
:
for
m
in
english_
persian_
models
:
m
.
data_dir
=
f
"{m.model_dir}/espeak-ng-data"
m
.
vocoder
=
"hifigan_v2.onnx"
return
chinese_models
+
english_models
return
chinese_models
+
english_
persian_
models
def
get_kokoro_models
()
->
List
[
TtsModel
]:
...
...
sherpa-onnx/csrc/offline-tts-matcha-impl.h
查看文件 @
9559a10
...
...
@@ -214,7 +214,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
}
std
::
vector
<
TokenIDs
>
token_ids
=
frontend_
->
ConvertTextToTokenIds
(
text
,
"en-US"
);
frontend_
->
ConvertTextToTokenIds
(
text
,
meta_data
.
voice
);
if
(
token_ids
.
empty
()
||
(
token_ids
.
size
()
==
1
&&
token_ids
[
0
].
tokens
.
empty
()))
{
...
...
sherpa-onnx/csrc/offline-tts-matcha-model-meta-data.h
查看文件 @
9559a10
...
...
@@ -21,6 +21,8 @@ struct OfflineTtsMatchaModelMetaData {
int32_t
has_espeak
=
0
;
int32_t
use_eos_bos
=
0
;
int32_t
pad_id
=
0
;
std
::
string
voice
;
};
}
// namespace sherpa_onnx
...
...
sherpa-onnx/csrc/offline-tts-matcha-model.cc
查看文件 @
9559a10
...
...
@@ -83,15 +83,32 @@ class OfflineTtsMatchaModel::Impl {
Ort
::
Value
sid_tensor
=
Ort
::
Value
::
CreateTensor
(
memory_info
,
&
sid
,
1
,
&
scale_shape
,
1
);
std
::
array
<
float
,
2
>
scales
=
{
noise_scale
,
length_scale
};
int64_t
scales_shape
=
2
;
Ort
::
Value
scales_tensor
=
Ort
::
Value
::
CreateTensor
(
memory_info
,
scales
.
data
(),
scales
.
size
(),
&
scales_shape
,
1
);
std
::
vector
<
Ort
::
Value
>
inputs
;
inputs
.
reserve
(
5
);
inputs
.
push_back
(
std
::
move
(
x
));
inputs
.
push_back
(
std
::
move
(
x_length
));
if
(
input_names_
[
2
]
==
"scales"
)
{
// for models from
// https://github.com/shivammehta25/Matcha-TTS
inputs
.
push_back
(
std
::
move
(
scales_tensor
));
}
else
{
// for models from icefall
inputs
.
push_back
(
std
::
move
(
noise_scale_tensor
));
inputs
.
push_back
(
std
::
move
(
length_scale_tensor
));
}
if
(
input_names_
.
size
()
==
5
&&
input_names_
.
back
()
==
"sid"
)
{
// for models from icefall
inputs
.
push_back
(
std
::
move
(
sid_tensor
));
// Note that we have not supported multi-speaker tts models from
// https://github.com/shivammehta25/Matcha-TTS
}
auto
out
=
...
...
@@ -145,6 +162,8 @@ class OfflineTtsMatchaModel::Impl {
SHERPA_ONNX_READ_META_DATA
(
meta_data_
.
has_espeak
,
"has_espeak"
);
SHERPA_ONNX_READ_META_DATA
(
meta_data_
.
use_eos_bos
,
"use_eos_bos"
);
SHERPA_ONNX_READ_META_DATA
(
meta_data_
.
pad_id
,
"pad_id"
);
SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT
(
meta_data_
.
voice
,
"voice"
,
"en-us"
);
}
private
:
...
...
请
注册
或
登录
后发表评论