Fangjun Kuang
Committed by GitHub

Add Android TTS Engine APK for KittenTTS (#2465)

@@ -192,6 +192,7 @@ class MainActivity : AppCompatActivity() { @@ -192,6 +192,7 @@ class MainActivity : AppCompatActivity() {
192 var dataDir: String? 192 var dataDir: String?
193 var dictDir: String? 193 var dictDir: String?
194 var assets: AssetManager? = application.assets 194 var assets: AssetManager? = application.assets
  195 + var isKitten = false
195 196
196 // The purpose of such a design is to make the CI test easier 197 // The purpose of such a design is to make the CI test easier
197 // Please see 198 // Please see
@@ -291,6 +292,14 @@ class MainActivity : AppCompatActivity() { @@ -291,6 +292,14 @@ class MainActivity : AppCompatActivity() {
291 // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" 292 // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
292 // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" 293 // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
293 294
  295 + // Example 11
  296 + // kitten-nano-en-v0_1-fp16
  297 + // modelDir = "kitten-nano-en-v0_1-fp16"
  298 + // modelName = "model.fp16.onnx"
  299 + // voices = "voices.bin"
  300 + // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
  301 + // isKitten = true
  302 +
294 if (dataDir != null) { 303 if (dataDir != null) {
295 val newDir = copyDataDir(dataDir!!) 304 val newDir = copyDataDir(dataDir!!)
296 dataDir = "$newDir/$dataDir" 305 dataDir = "$newDir/$dataDir"
@@ -315,6 +324,7 @@ class MainActivity : AppCompatActivity() { @@ -315,6 +324,7 @@ class MainActivity : AppCompatActivity() {
315 dictDir = dictDir ?: "", 324 dictDir = dictDir ?: "",
316 ruleFsts = ruleFsts ?: "", 325 ruleFsts = ruleFsts ?: "",
317 ruleFars = ruleFars ?: "", 326 ruleFars = ruleFars ?: "",
  327 + isKitten = isKitten,
318 )!! 328 )!!
319 329
320 tts = OfflineTts(assetManager = assets, config = config) 330 tts = OfflineTts(assetManager = assets, config = config)
@@ -53,6 +53,7 @@ object TtsEngine { @@ -53,6 +53,7 @@ object TtsEngine {
53 private var dataDir: String? = null 53 private var dataDir: String? = null
54 private var dictDir: String? = null 54 private var dictDir: String? = null
55 private var assets: AssetManager? = null 55 private var assets: AssetManager? = null
  56 + private var isKitten = false
56 57
57 init { 58 init {
58 // The purpose of such a design is to make the CI test easier 59 // The purpose of such a design is to make the CI test easier
@@ -172,6 +173,15 @@ object TtsEngine { @@ -172,6 +173,15 @@ object TtsEngine {
172 // 173 //
173 // This model supports many languages, e.g., English, Chinese, etc. 174 // This model supports many languages, e.g., English, Chinese, etc.
174 // We set lang to eng here. 175 // We set lang to eng here.
  176 +
  177 + // Example 11
  178 + // kitten-nano-en-v0_1-fp16
  179 + // modelDir = "kitten-nano-en-v0_1-fp16"
  180 + // modelName = "model.fp16.onnx"
  181 + // voices = "voices.bin"
  182 + // dataDir = "kitten-nano-en-v0_1-fp16/espeak-ng-data"
  183 + // lang = "eng"
  184 + // isKitten = true
175 } 185 }
176 186
177 fun createTts(context: Context) { 187 fun createTts(context: Context) {
@@ -207,7 +217,8 @@ object TtsEngine { @@ -207,7 +217,8 @@ object TtsEngine {
207 dataDir = dataDir ?: "", 217 dataDir = dataDir ?: "",
208 dictDir = dictDir ?: "", 218 dictDir = dictDir ?: "",
209 ruleFsts = ruleFsts ?: "", 219 ruleFsts = ruleFsts ?: "",
210 - ruleFars = ruleFars ?: "" 220 + ruleFars = ruleFars ?: "",
  221 + isKitten = isKitten,
211 ) 222 )
212 223
213 speed = PreferenceHelper(context).getSpeed() 224 speed = PreferenceHelper(context).getSpeed()
@@ -64,6 +64,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt @@ -64,6 +64,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
64 sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt 64 sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt
65 {% endif %} 65 {% endif %}
66 66
  67 +{% if tts_model.is_kitten %}
  68 + sed -i.bak s/"isKitten = false"/"isKitten = true"/ ./TtsEngine.kt
  69 +{% endif %}
  70 +
  71 +
67 {% if tts_model.model_name %} 72 {% if tts_model.model_name %}
68 sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt 73 sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
69 {% endif %} 74 {% endif %}
@@ -101,6 +101,10 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt @@ -101,6 +101,10 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
101 sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt 101 sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt
102 {% endif %} 102 {% endif %}
103 103
  104 +{% if tts_model.is_kitten %}
  105 + sed -i.bak s/"isKitten = false"/"isKitten = true"/ ./MainActivity.kt
  106 +{% endif %}
  107 +
104 git diff 108 git diff
105 popd 109 popd
106 110
@@ -44,6 +44,7 @@ class TtsModel: @@ -44,6 +44,7 @@ class TtsModel:
44 lang_iso_639_3: str = "" 44 lang_iso_639_3: str = ""
45 lang_iso_639_3_2: str = "" 45 lang_iso_639_3_2: str = ""
46 lexicon: str = "" 46 lexicon: str = ""
  47 + is_kitten: bool = False
47 48
48 49
49 def convert_lang_to_iso_639_3(models: List[TtsModel]): 50 def convert_lang_to_iso_639_3(models: List[TtsModel]):
@@ -494,6 +495,22 @@ def get_kokoro_models() -> List[TtsModel]: @@ -494,6 +495,22 @@ def get_kokoro_models() -> List[TtsModel]:
494 return english_models + multi_lingual_models 495 return english_models + multi_lingual_models
495 496
496 497
  498 +def get_kitten_models() -> List[TtsModel]:
  499 + english_models = [
  500 + TtsModel(
  501 + model_dir="kitten-nano-en-v0_1-fp16",
  502 + model_name="model.fp16.onnx",
  503 + lang="en",
  504 + )
  505 + ]
  506 + for m in english_models:
  507 + m.data_dir = f"{m.model_dir}/espeak-ng-data"
  508 + m.voices = "voices.bin"
  509 + m.is_kitten = True
  510 +
  511 + return english_models
  512 +
  513 +
497 def main(): 514 def main():
498 args = get_args() 515 args = get_args()
499 index = args.index 516 index = args.index
@@ -507,6 +524,7 @@ def main(): @@ -507,6 +524,7 @@ def main():
507 all_model_list += get_coqui_models() 524 all_model_list += get_coqui_models()
508 all_model_list += get_matcha_models() 525 all_model_list += get_matcha_models()
509 all_model_list += get_kokoro_models() 526 all_model_list += get_kokoro_models()
  527 + all_model_list += get_kitten_models()
510 528
511 convert_lang_to_iso_639_3(all_model_list) 529 convert_lang_to_iso_639_3(all_model_list)
512 print(all_model_list) 530 print(all_model_list)
@@ -535,6 +535,10 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon( @@ -535,6 +535,10 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
535 template PiperPhonemizeLexicon::PiperPhonemizeLexicon( 535 template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
536 AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, 536 AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
537 const OfflineTtsKokoroModelMetaData &kokoro_meta_data); 537 const OfflineTtsKokoroModelMetaData &kokoro_meta_data);
  538 +
  539 +template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
  540 + AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
  541 + const OfflineTtsKittenModelMetaData &kokoro_meta_data);
538 #endif 542 #endif
539 543
540 #if __OHOS__ 544 #if __OHOS__
@@ -552,6 +556,11 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon( @@ -552,6 +556,11 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
552 NativeResourceManager *mgr, const std::string &tokens, 556 NativeResourceManager *mgr, const std::string &tokens,
553 const std::string &data_dir, 557 const std::string &data_dir,
554 const OfflineTtsKokoroModelMetaData &kokoro_meta_data); 558 const OfflineTtsKokoroModelMetaData &kokoro_meta_data);
  559 +
  560 +template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
  561 + NativeResourceManager *mgr, const std::string &tokens,
  562 + const std::string &data_dir,
  563 + const OfflineTtsKittenModelMetaData &kokoro_meta_data);
555 #endif 564 #endif
556 565
557 } // namespace sherpa_onnx 566 } // namespace sherpa_onnx