Committed by
GitHub
Update TTS Engine APK to support multi-lang (#2294)
正在显示
5 个修改的文件
包含
27 行增加
和
5 行删除
| @@ -23,6 +23,9 @@ object TtsEngine { | @@ -23,6 +23,9 @@ object TtsEngine { | ||
| 23 | // cmn for Mandarin | 23 | // cmn for Mandarin |
| 24 | var lang: String? = null | 24 | var lang: String? = null |
| 25 | 25 | ||
| 26 | + // if a model supports two languages, set also lang2 | ||
| 27 | + var lang2: String? = null | ||
| 28 | + | ||
| 26 | 29 | ||
| 27 | val speedState: MutableState<Float> = mutableFloatStateOf(1.0F) | 30 | val speedState: MutableState<Float> = mutableFloatStateOf(1.0F) |
| 28 | val speakerIdState: MutableState<Int> = mutableIntStateOf(0) | 31 | val speakerIdState: MutableState<Int> = mutableIntStateOf(0) |
| @@ -76,6 +79,7 @@ object TtsEngine { | @@ -76,6 +79,7 @@ object TtsEngine { | ||
| 76 | dataDir = null | 79 | dataDir = null |
| 77 | dictDir = null | 80 | dictDir = null |
| 78 | lang = null | 81 | lang = null |
| 82 | + lang2 = null | ||
| 79 | 83 | ||
| 80 | // Please enable one and only one of the examples below | 84 | // Please enable one and only one of the examples below |
| 81 | 85 | ||
| @@ -125,6 +129,7 @@ object TtsEngine { | @@ -125,6 +129,7 @@ object TtsEngine { | ||
| 125 | // lexicon = "lexicon.txt" | 129 | // lexicon = "lexicon.txt" |
| 126 | // dictDir = "vits-melo-tts-zh_en/dict" | 130 | // dictDir = "vits-melo-tts-zh_en/dict" |
| 127 | // lang = "zho" | 131 | // lang = "zho" |
| 132 | + // lang2 = "eng" | ||
| 128 | 133 | ||
| 129 | // Example 7 | 134 | // Example 7 |
| 130 | // matcha-icefall-zh-baker | 135 | // matcha-icefall-zh-baker |
| @@ -162,6 +167,7 @@ object TtsEngine { | @@ -162,6 +167,7 @@ object TtsEngine { | ||
| 162 | // dictDir = "kokoro-multi-lang-v1_0/dict" | 167 | // dictDir = "kokoro-multi-lang-v1_0/dict" |
| 163 | // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" | 168 | // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" |
| 164 | // lang = "eng" | 169 | // lang = "eng" |
| 170 | + // lang2 = "zho" | ||
| 165 | // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" | 171 | // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" |
| 166 | // | 172 | // |
| 167 | // This model supports many languages, e.g., English, Chinese, etc. | 173 | // This model supports many languages, e.g., English, Chinese, etc. |
| @@ -60,6 +60,9 @@ class TtsService : TextToSpeechService() { | @@ -60,6 +60,9 @@ class TtsService : TextToSpeechService() { | ||
| 60 | 60 | ||
| 61 | // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68 | 61 | // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68 |
| 62 | onLoadLanguage(TtsEngine.lang, "", "") | 62 | onLoadLanguage(TtsEngine.lang, "", "") |
| 63 | + if (TtsEngine.lang2 != null) { | ||
| 64 | + onLoadLanguage(TtsEngine.lang2, "", "") | ||
| 65 | + } | ||
| 63 | } | 66 | } |
| 64 | 67 | ||
| 65 | override fun onDestroy() { | 68 | override fun onDestroy() { |
| @@ -71,7 +74,7 @@ class TtsService : TextToSpeechService() { | @@ -71,7 +74,7 @@ class TtsService : TextToSpeechService() { | ||
| 71 | override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int { | 74 | override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int { |
| 72 | val lang = _lang ?: "" | 75 | val lang = _lang ?: "" |
| 73 | 76 | ||
| 74 | - if (lang == TtsEngine.lang) { | 77 | + if (lang == TtsEngine.lang || lang == TtsEngine.lang2) { |
| 75 | return TextToSpeech.LANG_AVAILABLE | 78 | return TextToSpeech.LANG_AVAILABLE |
| 76 | } | 79 | } |
| 77 | 80 | ||
| @@ -87,12 +90,12 @@ class TtsService : TextToSpeechService() { | @@ -87,12 +90,12 @@ class TtsService : TextToSpeechService() { | ||
| 87 | Log.i(TAG, "onLoadLanguage: $_lang, $_country") | 90 | Log.i(TAG, "onLoadLanguage: $_lang, $_country") |
| 88 | val lang = _lang ?: "" | 91 | val lang = _lang ?: "" |
| 89 | 92 | ||
| 90 | - return if (lang == TtsEngine.lang) { | 93 | + return if (lang == TtsEngine.lang || lang == TtsEngine.lang2) { |
| 91 | Log.i(TAG, "creating tts, lang :$lang") | 94 | Log.i(TAG, "creating tts, lang :$lang") |
| 92 | TtsEngine.createTts(application) | 95 | TtsEngine.createTts(application) |
| 93 | TextToSpeech.LANG_AVAILABLE | 96 | TextToSpeech.LANG_AVAILABLE |
| 94 | } else { | 97 | } else { |
| 95 | - Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}") | 98 | + Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}, ${TtsEngine.lang2}") |
| 96 | TextToSpeech.LANG_NOT_SUPPORTED | 99 | TextToSpeech.LANG_NOT_SUPPORTED |
| 97 | } | 100 | } |
| 98 | } | 101 | } |
| @@ -42,6 +42,7 @@ vocoder={{ tts_model.vocoder }} | @@ -42,6 +42,7 @@ vocoder={{ tts_model.vocoder }} | ||
| 42 | voices={{ tts_model.voices }} | 42 | voices={{ tts_model.voices }} |
| 43 | lang={{ tts_model.lang }} | 43 | lang={{ tts_model.lang }} |
| 44 | lang_iso_639_3={{ tts_model.lang_iso_639_3 }} | 44 | lang_iso_639_3={{ tts_model.lang_iso_639_3 }} |
| 45 | +lang_iso_639_3_2={{ tts_model.lang_iso_639_3_2 }} | ||
| 45 | 46 | ||
| 46 | wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2 | 47 | wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2 |
| 47 | tar xf $model_dir.tar.bz2 | 48 | tar xf $model_dir.tar.bz2 |
| @@ -59,6 +60,10 @@ pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/en | @@ -59,6 +60,10 @@ pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/en | ||
| 59 | sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt | 60 | sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt |
| 60 | sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | 61 | sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt |
| 61 | 62 | ||
| 63 | +{% if tts_model.lang2 %} | ||
| 64 | + sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt | ||
| 65 | +{% endif %} | ||
| 66 | + | ||
| 62 | {% if tts_model.model_name %} | 67 | {% if tts_model.model_name %} |
| 63 | sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt | 68 | sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt |
| 64 | {% endif %} | 69 | {% endif %} |
| @@ -109,7 +114,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | @@ -109,7 +114,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | ||
| 109 | lang=zh_en | 114 | lang=zh_en |
| 110 | fi | 115 | fi |
| 111 | 116 | ||
| 112 | -if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then | 117 | +if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then |
| 113 | lang=zh_en | 118 | lang=zh_en |
| 114 | fi | 119 | fi |
| 115 | 120 |
| @@ -108,7 +108,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | @@ -108,7 +108,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | ||
| 108 | lang=zh_en | 108 | lang=zh_en |
| 109 | fi | 109 | fi |
| 110 | 110 | ||
| 111 | -if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then | 111 | +if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then |
| 112 | lang=zh_en | 112 | lang=zh_en |
| 113 | fi | 113 | fi |
| 114 | 114 |
| @@ -35,12 +35,14 @@ class TtsModel: | @@ -35,12 +35,14 @@ class TtsModel: | ||
| 35 | vocoder: str = "" # for matcha | 35 | vocoder: str = "" # for matcha |
| 36 | voices: str = "" # for kokoro | 36 | voices: str = "" # for kokoro |
| 37 | lang: str = "" # en, zh, fr, de, etc. | 37 | lang: str = "" # en, zh, fr, de, etc. |
| 38 | + lang2: str = "" # en, zh, fr, de, etc. | ||
| 38 | rule_fsts: Optional[List[str]] = None | 39 | rule_fsts: Optional[List[str]] = None |
| 39 | rule_fars: Optional[List[str]] = None | 40 | rule_fars: Optional[List[str]] = None |
| 40 | data_dir: Optional[str] = None | 41 | data_dir: Optional[str] = None |
| 41 | dict_dir: Optional[str] = None | 42 | dict_dir: Optional[str] = None |
| 42 | is_char: bool = False | 43 | is_char: bool = False |
| 43 | lang_iso_639_3: str = "" | 44 | lang_iso_639_3: str = "" |
| 45 | + lang_iso_639_3_2: str = "" | ||
| 44 | lexicon: str = "" | 46 | lexicon: str = "" |
| 45 | 47 | ||
| 46 | 48 | ||
| @@ -48,6 +50,8 @@ def convert_lang_to_iso_639_3(models: List[TtsModel]): | @@ -48,6 +50,8 @@ def convert_lang_to_iso_639_3(models: List[TtsModel]): | ||
| 48 | for m in models: | 50 | for m in models: |
| 49 | if m.lang_iso_639_3 == "": | 51 | if m.lang_iso_639_3 == "": |
| 50 | m.lang_iso_639_3 = Lang(m.lang).pt3 | 52 | m.lang_iso_639_3 = Lang(m.lang).pt3 |
| 53 | + if m.lang2 != "": | ||
| 54 | + m.lang_iso_639_3_2 = Lang(m.lang2).pt3 | ||
| 51 | 55 | ||
| 52 | 56 | ||
| 53 | def get_coqui_models() -> List[TtsModel]: | 57 | def get_coqui_models() -> List[TtsModel]: |
| @@ -322,6 +326,7 @@ def get_vits_models() -> List[TtsModel]: | @@ -322,6 +326,7 @@ def get_vits_models() -> List[TtsModel]: | ||
| 322 | model_dir="vits-melo-tts-zh_en", | 326 | model_dir="vits-melo-tts-zh_en", |
| 323 | model_name="model.onnx", | 327 | model_name="model.onnx", |
| 324 | lang="zh", | 328 | lang="zh", |
| 329 | + lang2="en", | ||
| 325 | ), | 330 | ), |
| 326 | TtsModel( | 331 | TtsModel( |
| 327 | model_dir="vits-zh-hf-fanchen-C", | 332 | model_dir="vits-zh-hf-fanchen-C", |
| @@ -438,16 +443,19 @@ def get_kokoro_models() -> List[TtsModel]: | @@ -438,16 +443,19 @@ def get_kokoro_models() -> List[TtsModel]: | ||
| 438 | model_dir="kokoro-multi-lang-v1_0", | 443 | model_dir="kokoro-multi-lang-v1_0", |
| 439 | model_name="model.onnx", | 444 | model_name="model.onnx", |
| 440 | lang="en", | 445 | lang="en", |
| 446 | + lang2="zh", | ||
| 441 | ), | 447 | ), |
| 442 | TtsModel( | 448 | TtsModel( |
| 443 | model_dir="kokoro-multi-lang-v1_1", | 449 | model_dir="kokoro-multi-lang-v1_1", |
| 444 | model_name="model.onnx", | 450 | model_name="model.onnx", |
| 445 | lang="en", | 451 | lang="en", |
| 452 | + lang2="zh", | ||
| 446 | ), | 453 | ), |
| 447 | TtsModel( | 454 | TtsModel( |
| 448 | model_dir="kokoro-int8-multi-lang-v1_1", | 455 | model_dir="kokoro-int8-multi-lang-v1_1", |
| 449 | model_name="model.int8.onnx", | 456 | model_name="model.int8.onnx", |
| 450 | lang="en", | 457 | lang="en", |
| 458 | + lang2="zh", | ||
| 451 | ), | 459 | ), |
| 452 | ] | 460 | ] |
| 453 | for m in multi_lingual_models: | 461 | for m in multi_lingual_models: |
-
请 注册 或 登录 后发表评论