Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-01-06 06:44:09 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-01-06 06:44:09 +0800
Commit
1fe5fe495fc29cbef663c7d9bd4faddfaf40f024
1fe5fe49
1 parent
3eced3e7
Add Android demo for MatchaTTS models. (#1683)
隐藏空白字符变更
内嵌
并排对比
正在显示
9 个修改的文件
包含
222 行增加
和
38 行删除
.github/workflows/apk-tts-engine.yaml
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
scripts/apk/build-apk-tts-engine.sh.in
scripts/apk/build-apk-tts.sh.in
scripts/apk/generate-tts-apk-script.py
sherpa-onnx/csrc/offline-tts-vits-impl.h
sherpa-onnx/kotlin-api/Tts.kt
.github/workflows/apk-tts-engine.yaml
查看文件 @
1fe5fe4
...
...
@@ -26,6 +26,7 @@ jobs:
total
:
[
"
40"
]
index
:
[
"
0"
,
"
1"
,
"
2"
,
"
3"
,
"
4"
,
"
5"
,
"
6"
,
"
7"
,
"
8"
,
"
9"
,
"
10"
,
"
11"
,
"
12"
,
"
13"
,
"
14"
,
"
15"
,
"
16"
,
"
17"
,
"
18"
,
"
19"
,
"
20"
,
"
21"
,
"
22"
,
"
23"
,
"
24"
,
"
25"
,
"
26"
,
"
27"
,
"
28"
,
"
29"
,
"
30"
,
"
31"
,
"
32"
,
"
33"
,
"
34"
,
"
35"
,
"
36"
,
"
37"
,
"
38"
,
"
39"
]
steps
:
-
uses
:
actions/checkout@v4
with
:
...
...
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
查看文件 @
1fe5fe4
...
...
@@ -183,6 +183,8 @@ class MainActivity : AppCompatActivity() {
private fun initTts() {
var modelDir: String?
var modelName: String?
var acousticModelName: String?
var vocoder: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
...
...
@@ -193,8 +195,18 @@ class MainActivity : AppCompatActivity() {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null
// VITS -- begin
modelName = null
// VITS -- end
// Matcha -- begin
acousticModelName = null
vocoder = null
// Matcha -- end
modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
...
...
@@ -217,7 +229,6 @@ class MainActivity : AppCompatActivity() {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
...
...
@@ -233,24 +244,47 @@ class MainActivity : AppCompatActivity() {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// Example 6
// vits-melo-tts-zh_en
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
// modelDir = "vits-melo-tts-zh_en"
// modelName = "model.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-melo-tts-zh_en/dict"
// Example 7
// matcha-icefall-zh-baker
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = "matcha-icefall-zh-baker"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// lexicon = "lexicon.txt"
// dictDir = "matcha-icefall-zh-baker/dict"
// Example 8
// matcha-icefall-en_US-ljspeech
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = "matcha-icefall-en_US-ljspeech"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
if (dataDir != null) {
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
val newDir = copyDataDir(dataDir!!)
dataDir = "$newDir/$dataDir"
}
if (dictDir != null) {
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
val newDir = copyDataDir(dictDir!!)
dictDir = "$newDir/$dictDir"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}
val config = getOfflineTtsConfig(
modelDir = modelDir!!,
modelName = modelName!!,
modelName = modelName ?: "",
acousticModelName = acousticModelName ?: "",
vocoder = vocoder ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
...
...
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/MainActivity.kt
查看文件 @
1fe5fe4
...
...
@@ -57,7 +57,7 @@ class MainActivity : ComponentActivity() {
color = MaterialTheme.colorScheme.background
) {
Scaffold(topBar = {
TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
TopAppBar(title = { Text("Next-gen Kaldi: TTS
Engine
") })
}) {
Box(modifier = Modifier.padding(it)) {
Column(modifier = Modifier.padding(16.dp)) {
...
...
@@ -65,8 +65,8 @@ class MainActivity : ComponentActivity() {
Text("Speed " + String.format("%.1f", TtsEngine.speed))
Slider(
value = TtsEngine.speedState.value,
onValueChange = {
TtsEngine.speed = it
onValueChange = {
TtsEngine.speed = it
preferenceHelper.setSpeed(it)
},
valueRange = 0.2F..3.0F,
...
...
@@ -138,7 +138,9 @@ class MainActivity : ComponentActivity() {
val filename =
application.filesDir.absolutePath + "/generated.wav"
val ok =
audio.samples.isNotEmpty() && audio.save(filename)
audio.samples.isNotEmpty() && audio.save(
filename
)
if (ok) {
stopMediaPlayer()
...
...
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
查看文件 @
1fe5fe4
package com.k2fsa.sherpa.onnx.tts.engine
import PreferenceHelper
import android.content.Context
import android.content.res.AssetManager
import android.util.Log
...
...
@@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
import PreferenceHelper
object TtsEngine {
var tts: OfflineTts? = null
...
...
@@ -41,6 +41,8 @@ object TtsEngine {
private var modelDir: String? = null
private var modelName: String? = null
private var acousticModelName: String? = null
private var vocoder: String? = null
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
...
...
@@ -52,8 +54,17 @@ object TtsEngine {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null
//
// For VITS -- begin
modelName = null
// For VITS -- end
// For Matcha -- begin
acousticModelName = null
vocoder = null
// For Matcha -- end
modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
...
...
@@ -82,7 +93,6 @@ object TtsEngine {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"
...
...
@@ -101,8 +111,35 @@ object TtsEngine {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// lang = "deu"
}
// Example 6
// vits-melo-tts-zh_en
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
// modelDir = "vits-melo-tts-zh_en"
// modelName = "model.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-melo-tts-zh_en/dict"
// lang = "zho"
// Example 7
// matcha-icefall-zh-baker
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = "matcha-icefall-zh-baker"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// lexicon = "lexicon.txt"
// dictDir = "matcha-icefall-zh-baker/dict"
// lang = "zho"
// Example 8
// matcha-icefall-en_US-ljspeech
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = "matcha-icefall-en_US-ljspeech"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
// lang = "eng"
}
fun createTts(context: Context) {
Log.i(TAG, "Init Next-gen Kaldi TTS")
...
...
@@ -115,22 +152,22 @@ object TtsEngine {
assets = context.assets
if (dataDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = "$newDir/$modelDir"
val newDir = copyDataDir(context, dataDir!!)
dataDir = "$newDir/$dataDir"
assets = null
}
if (dictDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = "$newDir/$modelDir"
dictDir = "$modelDir/dict"
val newDir = copyDataDir(context, dictDir!!)
dictDir = "$newDir/$dictDir"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
modelDir = modelDir!!,
modelName = modelName ?: "",
acousticModelName = acousticModelName ?: "",
vocoder = vocoder ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
...
...
scripts/apk/build-apk-tts-engine.sh.in
查看文件 @
1fe5fe4
...
...
@@ -37,6 +37,8 @@ mkdir -p apks
pushd
./android/SherpaOnnxTtsEngine/app/src/main/assets/
model_dir
={{
tts_model.model_dir
}}
model_name
={{
tts_model.model_name
}}
acoustic_model_name
={{
tts_model.acoustic_model_name
}}
vocoder
={{
tts_model.vocoder
}}
lang
={{
tts_model.lang
}}
lang_iso_639_3
={{
tts_model.lang_iso_639_3
}}
...
...
@@ -44,15 +46,30 @@ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$mod
tar xf
$model_dir
.tar.bz2
rm
$model_dir
.tar.bz2
{%
if
tts_model.vocoder %
}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/
$vocoder
{%
endif %
}
popd
# Now we are at the project root directory
git checkout .
pushd
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
sed -i.bak s/
"modelDir = null"
/
"modelDir =
\"
$model_dir
\"
"
/ ./TtsEngine.kt
sed -i.bak s/
"modelName = null"
/
"modelName =
\"
$model_name
\"
"
/ ./TtsEngine.kt
sed -i.bak s/
"lang = null"
/
"lang =
\"
$lang_iso_639_3
\"
"
/ ./TtsEngine.kt
{%
if
tts_model.model_name %
}
sed -i.bak s/
"modelName = null"
/
"modelName =
\"
$model_name
\"
"
/ ./TtsEngine.kt
{%
endif %
}
{%
if
tts_model.model_name %
}
sed -i.bak s/
"acousticModelName = null"
/
"acousticModelName =
\"
$acoustic_model_name
\"
"
/ ./TtsEngine.kt
{%
endif %
}
{%
if
tts_model.vocoder %
}
sed -i.bak s/
"vocoder = null"
/
"vocoder =
\"
$vocoder
\"
"
/ ./TtsEngine.kt
{%
endif %
}
{%
if
tts_model.rule_fsts %
}
rule_fsts
={{
tts_model.rule_fsts
}}
sed -i.bak s%
"ruleFsts = null"
%
"ruleFsts =
\"
$rule_fsts
\"
"
% ./TtsEngine.kt
...
...
@@ -109,6 +126,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done
rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/
$model_dir
rm -fv ./android/SherpaOnnxTtsEngine/app/src/main/assets/
*
.onnx
{%
endfor %
}
git checkout .
...
...
scripts/apk/build-apk-tts.sh.in
查看文件 @
1fe5fe4
...
...
@@ -37,19 +37,38 @@ mkdir -p apks
pushd
./android/SherpaOnnxTts/app/src/main/assets/
model_dir
={{
tts_model.model_dir
}}
model_name
={{
tts_model.model_name
}}
acoustic_model_name
={{
tts_model.acoustic_model_name
}}
vocoder
={{
tts_model.vocoder
}}
lang
={{
tts_model.lang
}}
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/
$model_dir
.tar.bz2
tar xf
$model_dir
.tar.bz2
rm
$model_dir
.tar.bz2
{%
if
tts_model.vocoder %
}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/
$vocoder
{%
endif %
}
popd
# Now we are at the project root directory
git checkout .
pushd
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/
"modelDir = null"
/
"modelDir =
\"
$model_dir
\"
"
/ ./MainActivity.kt
sed -i.bak s/
"modelName = null"
/
"modelName =
\"
$model_name
\"
"
/ ./MainActivity.kt
{%
if
tts_model.model_name %
}
sed -i.bak s/
"modelName = null"
/
"modelName =
\"
$model_name
\"
"
/ ./MainActivity.kt
{%
endif %
}
{%
if
tts_model.acoustic_model_name %
}
sed -i.bak s/
"acousticModelName = null"
/
"acousticModelName =
\"
$acoustic_model_name
\"
"
/ ./MainActivity.kt
{%
endif %
}
{%
if
tts_model.vocoder %
}
sed -i.bak s/
"vocoder = null"
/
"vocoder =
\"
$vocoder
\"
"
/ ./MainActivity.kt
{%
endif %
}
{%
if
tts_model.rule_fsts %
}
rule_fsts
={{
tts_model.rule_fsts
}}
...
...
@@ -107,6 +126,8 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done
rm -rf ./android/SherpaOnnxTts/app/src/main/assets/
$model_dir
rm -fv ./android/SherpaOnnxTts/app/src/main/assets/
*
.onnx
{%
endfor %
}
git checkout .
...
...
scripts/apk/generate-tts-apk-script.py
查看文件 @
1fe5fe4
...
...
@@ -30,7 +30,9 @@ def get_args():
@dataclass
class
TtsModel
:
model_dir
:
str
model_name
:
str
=
""
model_name
:
str
=
""
# for vits
acoustic_model_name
:
str
=
""
# for matcha
vocoder
:
str
=
""
# for matcha
lang
:
str
=
""
# en, zh, fr, de, etc.
rule_fsts
:
Optional
[
List
[
str
]]
=
None
rule_fars
:
Optional
[
List
[
str
]]
=
None
...
...
@@ -378,6 +380,35 @@ def get_vits_models() -> List[TtsModel]:
return
all_models
def
get_matcha_models
()
->
List
[
TtsModel
]:
chinese_models
=
[
TtsModel
(
model_dir
=
"matcha-icefall-zh-baker"
,
acoustic_model_name
=
"model-steps-3.onnx"
,
lang
=
"zh"
,
)
]
rule_fsts
=
[
"phone.fst"
,
"date.fst"
,
"number.fst"
]
for
m
in
chinese_models
:
s
=
[
f
"{m.model_dir}/{r}"
for
r
in
rule_fsts
]
m
.
rule_fsts
=
","
.
join
(
s
)
m
.
dict_dir
=
m
.
model_dir
+
"/dict"
m
.
vocoder
=
"hifigan_v2.onnx"
english_models
=
[
TtsModel
(
model_dir
=
"matcha-icefall-en_US-ljspeech"
,
acoustic_model_name
=
"model-steps-3.onnx"
,
lang
=
"en"
,
)
]
for
m
in
english_models
:
m
.
data_dir
=
f
"{m.model_dir}/espeak-ng-data"
m
.
vocoder
=
"hifigan_v2.onnx"
return
chinese_models
+
english_models
def
main
():
args
=
get_args
()
index
=
args
.
index
...
...
@@ -389,7 +420,10 @@ def main():
all_model_list
+=
get_piper_models
()
all_model_list
+=
get_mimic3_models
()
all_model_list
+=
get_coqui_models
()
all_model_list
+=
get_matcha_models
()
convert_lang_to_iso_639_3
(
all_model_list
)
print
(
all_model_list
)
num_models
=
len
(
all_model_list
)
...
...
sherpa-onnx/csrc/offline-tts-vits-impl.h
查看文件 @
1fe5fe4
...
...
@@ -348,6 +348,10 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
mgr
,
config_
.
model
.
vits
.
lexicon
,
config_
.
model
.
vits
.
tokens
,
config_
.
model
.
vits
.
dict_dir
,
model_
->
GetMetaData
(),
config_
.
model
.
debug
);
}
else
if
(
meta_data
.
jieba
&&
!
config_
.
model
.
vits
.
dict_dir
.
empty
())
{
frontend_
=
std
::
make_unique
<
JiebaLexicon
>
(
mgr
,
config_
.
model
.
vits
.
lexicon
,
config_
.
model
.
vits
.
tokens
,
config_
.
model
.
vits
.
dict_dir
,
config_
.
model
.
debug
);
}
else
if
(
meta_data
.
is_melo_tts
&&
meta_data
.
language
==
"English"
)
{
frontend_
=
std
::
make_unique
<
MeloTtsLexicon
>
(
mgr
,
config_
.
model
.
vits
.
lexicon
,
config_
.
model
.
vits
.
tokens
,
...
...
sherpa-onnx/kotlin-api/Tts.kt
查看文件 @
1fe5fe4
...
...
@@ -173,22 +173,55 @@ class OfflineTts(
// to download models
fun getOfflineTtsConfig(
modelDir: String,
modelName: String,
modelName: String, // for VITS
acousticModelName: String, // for Matcha
vocoder: String, // for Matcha
lexicon: String,
dataDir: String,
dictDir: String,
ruleFsts: String,
ruleFars: String
): OfflineTtsConfig {
if (modelName.isEmpty() && acousticModelName.isEmpty()) {
throw IllegalArgumentException("Please specify a TTS model")
}
if (modelName.isNotEmpty() && acousticModelName.isNotEmpty()) {
throw IllegalArgumentException("Please specify either a VITS or a Matcha model, but not both")
}
if (acousticModelName.isNotEmpty() && vocoder.isEmpty()) {
throw IllegalArgumentException("Please provide vocoder for Matcha TTS")
}
val vits = if (modelName.isNotEmpty()) {
OfflineTtsVitsModelConfig(
model = "$modelDir/$modelName",
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dataDir = dataDir,
dictDir = dictDir,
)
} else {
OfflineTtsVitsModelConfig()
}
val matcha = if (acousticModelName.isNotEmpty()) {
OfflineTtsMatchaModelConfig(
acousticModel = "$modelDir/$acousticModelName",
vocoder = vocoder,
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dictDir = dictDir,
dataDir = dataDir,
)
} else {
OfflineTtsMatchaModelConfig()
}
return OfflineTtsConfig(
model = OfflineTtsModelConfig(
vits = OfflineTtsVitsModelConfig(
model = "$modelDir/$modelName",
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dataDir = dataDir,
dictDir = dictDir,
),
vits = vits,
matcha = matcha,
numThreads = 2,
debug = true,
provider = "cpu",
...
...
请
注册
或
登录
后发表评论