Committed by
GitHub
Add Android TTS Engine APK for KittenTTS (#2465)
正在显示
6 个修改的文件
包含
58 行增加
和
1 行删除
| @@ -192,6 +192,7 @@ class MainActivity : AppCompatActivity() { | @@ -192,6 +192,7 @@ class MainActivity : AppCompatActivity() { | ||
| 192 | var dataDir: String? | 192 | var dataDir: String? |
| 193 | var dictDir: String? | 193 | var dictDir: String? |
| 194 | var assets: AssetManager? = application.assets | 194 | var assets: AssetManager? = application.assets |
| 195 | + var isKitten = false | ||
| 195 | 196 | ||
| 196 | // The purpose of such a design is to make the CI test easier | 197 | // The purpose of such a design is to make the CI test easier |
| 197 | // Please see | 198 | // Please see |
| @@ -291,6 +292,14 @@ class MainActivity : AppCompatActivity() { | @@ -291,6 +292,14 @@ class MainActivity : AppCompatActivity() { | ||
| 291 | // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" | 292 | // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" |
| 292 | // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" | 293 | // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" |
| 293 | 294 | ||
| 295 | + // Example 11 | ||
| 296 | + // kitten-nano-en-v0_1-fp16 | ||
| 297 | + // modelDir = "kitten-nano-en-v0_1-fp16" | ||
| 298 | + // modelName = "model.fp16.onnx" | ||
| 299 | + // voices = "voices.bin" | ||
| 300 | + // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data" | ||
| 301 | + // isKitten = true | ||
| 302 | + | ||
| 294 | if (dataDir != null) { | 303 | if (dataDir != null) { |
| 295 | val newDir = copyDataDir(dataDir!!) | 304 | val newDir = copyDataDir(dataDir!!) |
| 296 | dataDir = "$newDir/$dataDir" | 305 | dataDir = "$newDir/$dataDir" |
| @@ -315,6 +324,7 @@ class MainActivity : AppCompatActivity() { | @@ -315,6 +324,7 @@ class MainActivity : AppCompatActivity() { | ||
| 315 | dictDir = dictDir ?: "", | 324 | dictDir = dictDir ?: "", |
| 316 | ruleFsts = ruleFsts ?: "", | 325 | ruleFsts = ruleFsts ?: "", |
| 317 | ruleFars = ruleFars ?: "", | 326 | ruleFars = ruleFars ?: "", |
| 327 | + isKitten = isKitten, | ||
| 318 | )!! | 328 | )!! |
| 319 | 329 | ||
| 320 | tts = OfflineTts(assetManager = assets, config = config) | 330 | tts = OfflineTts(assetManager = assets, config = config) |
| @@ -53,6 +53,7 @@ object TtsEngine { | @@ -53,6 +53,7 @@ object TtsEngine { | ||
| 53 | private var dataDir: String? = null | 53 | private var dataDir: String? = null |
| 54 | private var dictDir: String? = null | 54 | private var dictDir: String? = null |
| 55 | private var assets: AssetManager? = null | 55 | private var assets: AssetManager? = null |
| 56 | + private var isKitten = false | ||
| 56 | 57 | ||
| 57 | init { | 58 | init { |
| 58 | // The purpose of such a design is to make the CI test easier | 59 | // The purpose of such a design is to make the CI test easier |
| @@ -172,6 +173,15 @@ object TtsEngine { | @@ -172,6 +173,15 @@ object TtsEngine { | ||
| 172 | // | 173 | // |
| 173 | // This model supports many languages, e.g., English, Chinese, etc. | 174 | // This model supports many languages, e.g., English, Chinese, etc. |
| 174 | // We set lang to eng here. | 175 | // We set lang to eng here. |
| 176 | + | ||
| 177 | + // Example 11 | ||
| 178 | + // kitten-nano-en-v0_1-fp16 | ||
| 179 | + // modelDir = "kitten-nano-en-v0_1-fp16" | ||
| 180 | + // modelName = "model.fp16.onnx" | ||
| 181 | + // voices = "voices.bin" | ||
| 182 | + // dataDir = "kitten-nano-en-v0_1-fp16/espeak-ng-data" | ||
| 183 | + // lang = "eng" | ||
| 184 | + // isKitten = true | ||
| 175 | } | 185 | } |
| 176 | 186 | ||
| 177 | fun createTts(context: Context) { | 187 | fun createTts(context: Context) { |
| @@ -207,7 +217,8 @@ object TtsEngine { | @@ -207,7 +217,8 @@ object TtsEngine { | ||
| 207 | dataDir = dataDir ?: "", | 217 | dataDir = dataDir ?: "", |
| 208 | dictDir = dictDir ?: "", | 218 | dictDir = dictDir ?: "", |
| 209 | ruleFsts = ruleFsts ?: "", | 219 | ruleFsts = ruleFsts ?: "", |
| 210 | - ruleFars = ruleFars ?: "" | 220 | + ruleFars = ruleFars ?: "", |
| 221 | + isKitten = isKitten, | ||
| 211 | ) | 222 | ) |
| 212 | 223 | ||
| 213 | speed = PreferenceHelper(context).getSpeed() | 224 | speed = PreferenceHelper(context).getSpeed() |
| @@ -64,6 +64,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | @@ -64,6 +64,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | ||
| 64 | sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt | 64 | sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt |
| 65 | {% endif %} | 65 | {% endif %} |
| 66 | 66 | ||
| 67 | +{% if tts_model.is_kitten %} | ||
| 68 | + sed -i.bak s/"isKitten = false"/"isKitten = true"/ ./TtsEngine.kt | ||
| 69 | +{% endif %} | ||
| 70 | + | ||
| 71 | + | ||
| 67 | {% if tts_model.model_name %} | 72 | {% if tts_model.model_name %} |
| 68 | sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt | 73 | sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt |
| 69 | {% endif %} | 74 | {% endif %} |
| @@ -101,6 +101,10 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt | @@ -101,6 +101,10 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt | ||
| 101 | sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt | 101 | sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt |
| 102 | {% endif %} | 102 | {% endif %} |
| 103 | 103 | ||
| 104 | +{% if tts_model.is_kitten %} | ||
| 105 | + sed -i.bak s/"isKitten = false"/"isKitten = true"/ ./MainActivity.kt | ||
| 106 | +{% endif %} | ||
| 107 | + | ||
| 104 | git diff | 108 | git diff |
| 105 | popd | 109 | popd |
| 106 | 110 |
| @@ -44,6 +44,7 @@ class TtsModel: | @@ -44,6 +44,7 @@ class TtsModel: | ||
| 44 | lang_iso_639_3: str = "" | 44 | lang_iso_639_3: str = "" |
| 45 | lang_iso_639_3_2: str = "" | 45 | lang_iso_639_3_2: str = "" |
| 46 | lexicon: str = "" | 46 | lexicon: str = "" |
| 47 | + is_kitten: bool = False | ||
| 47 | 48 | ||
| 48 | 49 | ||
| 49 | def convert_lang_to_iso_639_3(models: List[TtsModel]): | 50 | def convert_lang_to_iso_639_3(models: List[TtsModel]): |
| @@ -494,6 +495,22 @@ def get_kokoro_models() -> List[TtsModel]: | @@ -494,6 +495,22 @@ def get_kokoro_models() -> List[TtsModel]: | ||
| 494 | return english_models + multi_lingual_models | 495 | return english_models + multi_lingual_models |
| 495 | 496 | ||
| 496 | 497 | ||
| 498 | +def get_kitten_models() -> List[TtsModel]: | ||
| 499 | + english_models = [ | ||
| 500 | + TtsModel( | ||
| 501 | + model_dir="kitten-nano-en-v0_1-fp16", | ||
| 502 | + model_name="model.fp16.onnx", | ||
| 503 | + lang="en", | ||
| 504 | + ) | ||
| 505 | + ] | ||
| 506 | + for m in english_models: | ||
| 507 | + m.data_dir = f"{m.model_dir}/espeak-ng-data" | ||
| 508 | + m.voices = "voices.bin" | ||
| 509 | + m.is_kitten = True | ||
| 510 | + | ||
| 511 | + return english_models | ||
| 512 | + | ||
| 513 | + | ||
| 497 | def main(): | 514 | def main(): |
| 498 | args = get_args() | 515 | args = get_args() |
| 499 | index = args.index | 516 | index = args.index |
| @@ -507,6 +524,7 @@ def main(): | @@ -507,6 +524,7 @@ def main(): | ||
| 507 | all_model_list += get_coqui_models() | 524 | all_model_list += get_coqui_models() |
| 508 | all_model_list += get_matcha_models() | 525 | all_model_list += get_matcha_models() |
| 509 | all_model_list += get_kokoro_models() | 526 | all_model_list += get_kokoro_models() |
| 527 | + all_model_list += get_kitten_models() | ||
| 510 | 528 | ||
| 511 | convert_lang_to_iso_639_3(all_model_list) | 529 | convert_lang_to_iso_639_3(all_model_list) |
| 512 | print(all_model_list) | 530 | print(all_model_list) |
| @@ -535,6 +535,10 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | @@ -535,6 +535,10 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | ||
| 535 | template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | 535 | template PiperPhonemizeLexicon::PiperPhonemizeLexicon( |
| 536 | AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, | 536 | AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, |
| 537 | const OfflineTtsKokoroModelMetaData &kokoro_meta_data); | 537 | const OfflineTtsKokoroModelMetaData &kokoro_meta_data); |
| 538 | + | ||
| 539 | +template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | ||
| 540 | + AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, | ||
| 541 | + const OfflineTtsKittenModelMetaData &kokoro_meta_data); | ||
| 538 | #endif | 542 | #endif |
| 539 | 543 | ||
| 540 | #if __OHOS__ | 544 | #if __OHOS__ |
| @@ -552,6 +556,11 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | @@ -552,6 +556,11 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | ||
| 552 | NativeResourceManager *mgr, const std::string &tokens, | 556 | NativeResourceManager *mgr, const std::string &tokens, |
| 553 | const std::string &data_dir, | 557 | const std::string &data_dir, |
| 554 | const OfflineTtsKokoroModelMetaData &kokoro_meta_data); | 558 | const OfflineTtsKokoroModelMetaData &kokoro_meta_data); |
| 559 | + | ||
| 560 | +template PiperPhonemizeLexicon::PiperPhonemizeLexicon( | ||
| 561 | + NativeResourceManager *mgr, const std::string &tokens, | ||
| 562 | + const std::string &data_dir, | ||
| 563 | + const OfflineTtsKittenModelMetaData &kokoro_meta_data); | ||
| 555 | #endif | 564 | #endif |
| 556 | 565 | ||
| 557 | } // namespace sherpa_onnx | 566 | } // namespace sherpa_onnx |
-
请 注册 或 登录 后发表评论