Fangjun Kuang
Committed by GitHub

Add Android demo for Kokoro TTS 1.0 (#1799)

@@ -26,7 +26,6 @@ jobs: @@ -26,7 +26,6 @@ jobs:
26 total: ["40"] 26 total: ["40"]
27 index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"] 27 index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
28 28
29 -  
30 steps: 29 steps:
31 - uses: actions/checkout@v4 30 - uses: actions/checkout@v4
32 with: 31 with:
@@ -193,7 +193,7 @@ jobs: @@ -193,7 +193,7 @@ jobs:
193 cp -v ../scripts/kokoro/v1.0/README.md ./README.md 193 cp -v ../scripts/kokoro/v1.0/README.md ./README.md
194 cp -v ../LICENSE ./ 194 cp -v ../LICENSE ./
195 cp -av ../dict ./ 195 cp -av ../dict ./
196 - cp -v ../*.fst $d/ 196 + cp -v ../*.fst ./
197 197
198 git lfs track "*.onnx" 198 git lfs track "*.onnx"
199 git add . 199 git add .
@@ -206,6 +206,7 @@ jobs: @@ -206,6 +206,7 @@ jobs:
206 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true 206 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
207 207
208 - name: Release 208 - name: Release
  209 + if: github.repository_owner == 'csukuangfj'
209 uses: svenstaro/upload-release-action@v2 210 uses: svenstaro/upload-release-action@v2
210 with: 211 with:
211 file_glob: true 212 file_glob: true
@@ -214,3 +215,12 @@ jobs: @@ -214,3 +215,12 @@ jobs:
214 repo_name: k2-fsa/sherpa-onnx 215 repo_name: k2-fsa/sherpa-onnx
215 repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} 216 repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
216 tag: tts-models 217 tag: tts-models
  218 +
  219 + - name: Release
  220 + if: github.repository_owner == 'k2-fsa'
  221 + uses: svenstaro/upload-release-action@v2
  222 + with:
  223 + file_glob: true
  224 + file: ./*.tar.bz2
  225 + overwrite: true
  226 + tag: tts-models
@@ -281,6 +281,16 @@ class MainActivity : AppCompatActivity() { @@ -281,6 +281,16 @@ class MainActivity : AppCompatActivity() {
281 // voices = "voices.bin" 281 // voices = "voices.bin"
282 // dataDir = "kokoro-en-v0_19/espeak-ng-data" 282 // dataDir = "kokoro-en-v0_19/espeak-ng-data"
283 283
  284 + // Example 10
  285 + // kokoro-multi-lang-v1_0
  286 + // modelDir = "kokoro-multi-lang-v1_0"
  287 + // modelName = "model.onnx"
  288 + // voices = "voices.bin"
  289 + // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
  290 + // dictDir = "kokoro-multi-lang-v1_0/dict"
  291 + // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
  292 + // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
  293 +
284 if (dataDir != null) { 294 if (dataDir != null) {
285 val newDir = copyDataDir(dataDir!!) 295 val newDir = copyDataDir(dataDir!!)
286 dataDir = "$newDir/$dataDir" 296 dataDir = "$newDir/$dataDir"
@@ -289,7 +299,9 @@ class MainActivity : AppCompatActivity() { @@ -289,7 +299,9 @@ class MainActivity : AppCompatActivity() {
289 if (dictDir != null) { 299 if (dictDir != null) {
290 val newDir = copyDataDir(dictDir!!) 300 val newDir = copyDataDir(dictDir!!)
291 dictDir = "$newDir/$dictDir" 301 dictDir = "$newDir/$dictDir"
292 - ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" 302 + if (ruleFsts == null) {
  303 + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
  304 + }
293 } 305 }
294 306
295 val config = getOfflineTtsConfig( 307 val config = getOfflineTtsConfig(
@@ -152,6 +152,20 @@ object TtsEngine { @@ -152,6 +152,20 @@ object TtsEngine {
152 // voices = "voices.bin" 152 // voices = "voices.bin"
153 // dataDir = "kokoro-en-v0_19/espeak-ng-data" 153 // dataDir = "kokoro-en-v0_19/espeak-ng-data"
154 // lang = "eng" 154 // lang = "eng"
  155 +
  156 + // Example 10
  157 + // kokoro-multi-lang-v1_0
  158 + // modelDir = "kokoro-multi-lang-v1_0"
  159 + // modelName = "model.onnx"
  160 + // voices = "voices.bin"
  161 + // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
  162 + // dictDir = "kokoro-multi-lang-v1_0/dict"
  163 + // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
  164 + // lang = "eng"
  165 + // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
  166 + //
  167 + // This model supports many languages, e.g., English, Chinese, etc.
  168 + // We set lang to eng here.
155 } 169 }
156 170
157 fun createTts(context: Context) { 171 fun createTts(context: Context) {
@@ -172,7 +186,9 @@ object TtsEngine { @@ -172,7 +186,9 @@ object TtsEngine {
172 if (dictDir != null) { 186 if (dictDir != null) {
173 val newDir = copyDataDir(context, dictDir!!) 187 val newDir = copyDataDir(context, dictDir!!)
174 dictDir = "$newDir/$dictDir" 188 dictDir = "$newDir/$dictDir"
175 - ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" 189 + if (ruleFsts == null) {
  190 + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
  191 + }
176 } 192 }
177 193
178 val config = getOfflineTtsConfig( 194 val config = getOfflineTtsConfig(
@@ -97,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt @@ -97,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
97 sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt 97 sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt
98 {% endif %} 98 {% endif %}
99 99
  100 +{% if tts_model.lexicon %}
  101 + lexicon={{ tts_model.lexicon }}
  102 + sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./TtsEngine.kt
  103 +{% endif %}
  104 +
100 git diff 105 git diff
101 popd 106 popd
102 107
@@ -104,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then @@ -104,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
104 lang=zh_en 109 lang=zh_en
105 fi 110 fi
106 111
  112 +if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
  113 + lang=zh_en
  114 +fi
  115 +
107 for arch in arm64-v8a armeabi-v7a x86_64 x86; do 116 for arch in arm64-v8a armeabi-v7a x86_64 x86; do
108 log "------------------------------------------------------------" 117 log "------------------------------------------------------------"
109 log "build tts apk for $arch" 118 log "build tts apk for $arch"
@@ -96,6 +96,11 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt @@ -96,6 +96,11 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
96 sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt 96 sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
97 {% endif %} 97 {% endif %}
98 98
  99 +{% if tts_model.lexicon %}
  100 + lexicon={{ tts_model.lexicon }}
  101 + sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt
  102 +{% endif %}
  103 +
99 git diff 104 git diff
100 popd 105 popd
101 106
@@ -103,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then @@ -103,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
103 lang=zh_en 108 lang=zh_en
104 fi 109 fi
105 110
  111 +if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
  112 + lang=zh_en
  113 +fi
  114 +
106 for arch in arm64-v8a armeabi-v7a x86_64 x86; do 115 for arch in arm64-v8a armeabi-v7a x86_64 x86; do
107 log "------------------------------------------------------------" 116 log "------------------------------------------------------------"
108 log "build tts apk for $arch" 117 log "build tts apk for $arch"
@@ -41,6 +41,7 @@ class TtsModel: @@ -41,6 +41,7 @@ class TtsModel:
41 dict_dir: Optional[str] = None 41 dict_dir: Optional[str] = None
42 is_char: bool = False 42 is_char: bool = False
43 lang_iso_639_3: str = "" 43 lang_iso_639_3: str = ""
  44 + lexicon: str = ""
44 45
45 46
46 def convert_lang_to_iso_639_3(models: List[TtsModel]): 47 def convert_lang_to_iso_639_3(models: List[TtsModel]):
@@ -422,7 +423,21 @@ def get_kokoro_models() -> List[TtsModel]: @@ -422,7 +423,21 @@ def get_kokoro_models() -> List[TtsModel]:
422 m.data_dir = f"{m.model_dir}/espeak-ng-data" 423 m.data_dir = f"{m.model_dir}/espeak-ng-data"
423 m.voices = "voices.bin" 424 m.voices = "voices.bin"
424 425
425 - return english_models 426 + multi_lingual_models = [
  427 + TtsModel(
  428 + model_dir="kokoro-multi-lang-v1_0",
  429 + model_name="model.onnx",
  430 + lang="en",
  431 + )
  432 + ]
  433 + for m in multi_lingual_models:
  434 + m.data_dir = f"{m.model_dir}/espeak-ng-data"
  435 + m.dict_dir = f"{m.model_dir}/dict"
  436 + m.voices = "voices.bin"
  437 + m.lexicon = f"{m.model_dir}/lexicon-us-en.txt,{m.model_dir}/lexicon-zh.txt"
  438 + m.rule_fsts = f"{m.model_dir}/phone-zh.fst,{m.model_dir}/date-zh.fst,{m.model_dir}/number-zh.fst"
  439 +
  440 + return english_models + multi_lingual_models
426 441
427 442
428 def main(): 443 def main():
@@ -256,7 +256,11 @@ fun getOfflineTtsConfig( @@ -256,7 +256,11 @@ fun getOfflineTtsConfig(
256 voices = "$modelDir/$voices", 256 voices = "$modelDir/$voices",
257 tokens = "$modelDir/tokens.txt", 257 tokens = "$modelDir/tokens.txt",
258 dataDir = dataDir, 258 dataDir = dataDir,
259 - lexicon = if ("," in lexicon) lexicon else "$modelDir/$lexicon", 259 + lexicon = when {
  260 + lexicon == "" -> lexicon
  261 + "," in lexicon -> lexicon
  262 + else -> "$modelDir/$lexicon"
  263 + },
260 dictDir = dictDir, 264 dictDir = dictDir,
261 ) 265 )
262 } else { 266 } else {