Fangjun Kuang
Committed by GitHub

Update TTS Engine APK to support multi-lang (#2294)

@@ -23,6 +23,9 @@ object TtsEngine { @@ -23,6 +23,9 @@ object TtsEngine {
23 // cmn for Mandarin 23 // cmn for Mandarin
24 var lang: String? = null 24 var lang: String? = null
25 25
  26 + // if a model supports two languages, set also lang2
  27 + var lang2: String? = null
  28 +
26 29
27 val speedState: MutableState<Float> = mutableFloatStateOf(1.0F) 30 val speedState: MutableState<Float> = mutableFloatStateOf(1.0F)
28 val speakerIdState: MutableState<Int> = mutableIntStateOf(0) 31 val speakerIdState: MutableState<Int> = mutableIntStateOf(0)
@@ -76,6 +79,7 @@ object TtsEngine { @@ -76,6 +79,7 @@ object TtsEngine {
76 dataDir = null 79 dataDir = null
77 dictDir = null 80 dictDir = null
78 lang = null 81 lang = null
  82 + lang2 = null
79 83
80 // Please enable one and only one of the examples below 84 // Please enable one and only one of the examples below
81 85
@@ -125,6 +129,7 @@ object TtsEngine { @@ -125,6 +129,7 @@ object TtsEngine {
125 // lexicon = "lexicon.txt" 129 // lexicon = "lexicon.txt"
126 // dictDir = "vits-melo-tts-zh_en/dict" 130 // dictDir = "vits-melo-tts-zh_en/dict"
127 // lang = "zho" 131 // lang = "zho"
  132 + // lang2 = "eng"
128 133
129 // Example 7 134 // Example 7
130 // matcha-icefall-zh-baker 135 // matcha-icefall-zh-baker
@@ -162,6 +167,7 @@ object TtsEngine { @@ -162,6 +167,7 @@ object TtsEngine {
162 // dictDir = "kokoro-multi-lang-v1_0/dict" 167 // dictDir = "kokoro-multi-lang-v1_0/dict"
163 // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" 168 // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
164 // lang = "eng" 169 // lang = "eng"
  170 + // lang2 = "zho"
165 // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" 171 // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
166 // 172 //
167 // This model supports many languages, e.g., English, Chinese, etc. 173 // This model supports many languages, e.g., English, Chinese, etc.
@@ -60,6 +60,9 @@ class TtsService : TextToSpeechService() { @@ -60,6 +60,9 @@ class TtsService : TextToSpeechService() {
60 60
61 // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68 61 // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68
62 onLoadLanguage(TtsEngine.lang, "", "") 62 onLoadLanguage(TtsEngine.lang, "", "")
  63 + if (TtsEngine.lang2 != null) {
  64 + onLoadLanguage(TtsEngine.lang2, "", "")
  65 + }
63 } 66 }
64 67
65 override fun onDestroy() { 68 override fun onDestroy() {
@@ -71,7 +74,7 @@ class TtsService : TextToSpeechService() { @@ -71,7 +74,7 @@ class TtsService : TextToSpeechService() {
71 override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int { 74 override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int {
72 val lang = _lang ?: "" 75 val lang = _lang ?: ""
73 76
74 - if (lang == TtsEngine.lang) { 77 + if (lang == TtsEngine.lang || lang == TtsEngine.lang2) {
75 return TextToSpeech.LANG_AVAILABLE 78 return TextToSpeech.LANG_AVAILABLE
76 } 79 }
77 80
@@ -87,12 +90,12 @@ class TtsService : TextToSpeechService() { @@ -87,12 +90,12 @@ class TtsService : TextToSpeechService() {
87 Log.i(TAG, "onLoadLanguage: $_lang, $_country") 90 Log.i(TAG, "onLoadLanguage: $_lang, $_country")
88 val lang = _lang ?: "" 91 val lang = _lang ?: ""
89 92
90 - return if (lang == TtsEngine.lang) { 93 + return if (lang == TtsEngine.lang || lang == TtsEngine.lang2) {
91 Log.i(TAG, "creating tts, lang :$lang") 94 Log.i(TAG, "creating tts, lang :$lang")
92 TtsEngine.createTts(application) 95 TtsEngine.createTts(application)
93 TextToSpeech.LANG_AVAILABLE 96 TextToSpeech.LANG_AVAILABLE
94 } else { 97 } else {
95 - Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}") 98 + Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}, ${TtsEngine.lang2}")
96 TextToSpeech.LANG_NOT_SUPPORTED 99 TextToSpeech.LANG_NOT_SUPPORTED
97 } 100 }
98 } 101 }
@@ -42,6 +42,7 @@ vocoder={{ tts_model.vocoder }} @@ -42,6 +42,7 @@ vocoder={{ tts_model.vocoder }}
42 voices={{ tts_model.voices }} 42 voices={{ tts_model.voices }}
43 lang={{ tts_model.lang }} 43 lang={{ tts_model.lang }}
44 lang_iso_639_3={{ tts_model.lang_iso_639_3 }} 44 lang_iso_639_3={{ tts_model.lang_iso_639_3 }}
  45 +lang_iso_639_3_2={{ tts_model.lang_iso_639_3_2 }}
45 46
46 wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2 47 wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
47 tar xf $model_dir.tar.bz2 48 tar xf $model_dir.tar.bz2
@@ -59,6 +60,10 @@ pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/en @@ -59,6 +60,10 @@ pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/en
59 sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt 60 sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
60 sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt 61 sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
61 62
  63 +{% if tts_model.lang2 %}
  64 + sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt
  65 +{% endif %}
  66 +
62 {% if tts_model.model_name %} 67 {% if tts_model.model_name %}
63 sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt 68 sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
64 {% endif %} 69 {% endif %}
@@ -109,7 +114,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then @@ -109,7 +114,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
109 lang=zh_en 114 lang=zh_en
110 fi 115 fi
111 116
112 -if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then 117 +if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then
113 lang=zh_en 118 lang=zh_en
114 fi 119 fi
115 120
@@ -108,7 +108,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then @@ -108,7 +108,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
108 lang=zh_en 108 lang=zh_en
109 fi 109 fi
110 110
111 -if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then 111 +if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then
112 lang=zh_en 112 lang=zh_en
113 fi 113 fi
114 114
@@ -35,12 +35,14 @@ class TtsModel: @@ -35,12 +35,14 @@ class TtsModel:
35 vocoder: str = "" # for matcha 35 vocoder: str = "" # for matcha
36 voices: str = "" # for kokoro 36 voices: str = "" # for kokoro
37 lang: str = "" # en, zh, fr, de, etc. 37 lang: str = "" # en, zh, fr, de, etc.
  38 + lang2: str = "" # en, zh, fr, de, etc.
38 rule_fsts: Optional[List[str]] = None 39 rule_fsts: Optional[List[str]] = None
39 rule_fars: Optional[List[str]] = None 40 rule_fars: Optional[List[str]] = None
40 data_dir: Optional[str] = None 41 data_dir: Optional[str] = None
41 dict_dir: Optional[str] = None 42 dict_dir: Optional[str] = None
42 is_char: bool = False 43 is_char: bool = False
43 lang_iso_639_3: str = "" 44 lang_iso_639_3: str = ""
  45 + lang_iso_639_3_2: str = ""
44 lexicon: str = "" 46 lexicon: str = ""
45 47
46 48
@@ -48,6 +50,8 @@ def convert_lang_to_iso_639_3(models: List[TtsModel]): @@ -48,6 +50,8 @@ def convert_lang_to_iso_639_3(models: List[TtsModel]):
48 for m in models: 50 for m in models:
49 if m.lang_iso_639_3 == "": 51 if m.lang_iso_639_3 == "":
50 m.lang_iso_639_3 = Lang(m.lang).pt3 52 m.lang_iso_639_3 = Lang(m.lang).pt3
  53 + if m.lang2 != "":
  54 + m.lang_iso_639_3_2 = Lang(m.lang2).pt3
51 55
52 56
53 def get_coqui_models() -> List[TtsModel]: 57 def get_coqui_models() -> List[TtsModel]:
@@ -322,6 +326,7 @@ def get_vits_models() -> List[TtsModel]: @@ -322,6 +326,7 @@ def get_vits_models() -> List[TtsModel]:
322 model_dir="vits-melo-tts-zh_en", 326 model_dir="vits-melo-tts-zh_en",
323 model_name="model.onnx", 327 model_name="model.onnx",
324 lang="zh", 328 lang="zh",
  329 + lang2="en",
325 ), 330 ),
326 TtsModel( 331 TtsModel(
327 model_dir="vits-zh-hf-fanchen-C", 332 model_dir="vits-zh-hf-fanchen-C",
@@ -438,16 +443,19 @@ def get_kokoro_models() -> List[TtsModel]: @@ -438,16 +443,19 @@ def get_kokoro_models() -> List[TtsModel]:
438 model_dir="kokoro-multi-lang-v1_0", 443 model_dir="kokoro-multi-lang-v1_0",
439 model_name="model.onnx", 444 model_name="model.onnx",
440 lang="en", 445 lang="en",
  446 + lang2="zh",
441 ), 447 ),
442 TtsModel( 448 TtsModel(
443 model_dir="kokoro-multi-lang-v1_1", 449 model_dir="kokoro-multi-lang-v1_1",
444 model_name="model.onnx", 450 model_name="model.onnx",
445 lang="en", 451 lang="en",
  452 + lang2="zh",
446 ), 453 ),
447 TtsModel( 454 TtsModel(
448 model_dir="kokoro-int8-multi-lang-v1_1", 455 model_dir="kokoro-int8-multi-lang-v1_1",
449 model_name="model.int8.onnx", 456 model_name="model.int8.onnx",
450 lang="en", 457 lang="en",
  458 + lang2="zh",
451 ), 459 ),
452 ] 460 ]
453 for m in multi_lingual_models: 461 for m in multi_lingual_models: