Committed by
GitHub
Add Android demo for Kokoro TTS 1.0 (#1799)
正在显示
8 个修改的文件
包含
80 行增加
和
6 行删除
| @@ -26,7 +26,6 @@ jobs: | @@ -26,7 +26,6 @@ jobs: | ||
| 26 | total: ["40"] | 26 | total: ["40"] |
| 27 | index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"] | 27 | index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"] |
| 28 | 28 | ||
| 29 | - | ||
| 30 | steps: | 29 | steps: |
| 31 | - uses: actions/checkout@v4 | 30 | - uses: actions/checkout@v4 |
| 32 | with: | 31 | with: |
| @@ -193,7 +193,7 @@ jobs: | @@ -193,7 +193,7 @@ jobs: | ||
| 193 | cp -v ../scripts/kokoro/v1.0/README.md ./README.md | 193 | cp -v ../scripts/kokoro/v1.0/README.md ./README.md |
| 194 | cp -v ../LICENSE ./ | 194 | cp -v ../LICENSE ./ |
| 195 | cp -av ../dict ./ | 195 | cp -av ../dict ./ |
| 196 | - cp -v ../*.fst $d/ | 196 | + cp -v ../*.fst ./ |
| 197 | 197 | ||
| 198 | git lfs track "*.onnx" | 198 | git lfs track "*.onnx" |
| 199 | git add . | 199 | git add . |
| @@ -206,6 +206,7 @@ jobs: | @@ -206,6 +206,7 @@ jobs: | ||
| 206 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true | 206 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true |
| 207 | 207 | ||
| 208 | - name: Release | 208 | - name: Release |
| 209 | + if: github.repository_owner == 'csukuangfj' | ||
| 209 | uses: svenstaro/upload-release-action@v2 | 210 | uses: svenstaro/upload-release-action@v2 |
| 210 | with: | 211 | with: |
| 211 | file_glob: true | 212 | file_glob: true |
| @@ -214,3 +215,12 @@ jobs: | @@ -214,3 +215,12 @@ jobs: | ||
| 214 | repo_name: k2-fsa/sherpa-onnx | 215 | repo_name: k2-fsa/sherpa-onnx |
| 215 | repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | 216 | repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} |
| 216 | tag: tts-models | 217 | tag: tts-models |
| 218 | + | ||
| 219 | + - name: Release | ||
| 220 | + if: github.repository_owner == 'k2-fsa' | ||
| 221 | + uses: svenstaro/upload-release-action@v2 | ||
| 222 | + with: | ||
| 223 | + file_glob: true | ||
| 224 | + file: ./*.tar.bz2 | ||
| 225 | + overwrite: true | ||
| 226 | + tag: tts-models |
| @@ -281,6 +281,16 @@ class MainActivity : AppCompatActivity() { | @@ -281,6 +281,16 @@ class MainActivity : AppCompatActivity() { | ||
| 281 | // voices = "voices.bin" | 281 | // voices = "voices.bin" |
| 282 | // dataDir = "kokoro-en-v0_19/espeak-ng-data" | 282 | // dataDir = "kokoro-en-v0_19/espeak-ng-data" |
| 283 | 283 | ||
| 284 | + // Example 10 | ||
| 285 | + // kokoro-multi-lang-v1_0 | ||
| 286 | + // modelDir = "kokoro-multi-lang-v1_0" | ||
| 287 | + // modelName = "model.onnx" | ||
| 288 | + // voices = "voices.bin" | ||
| 289 | + // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data" | ||
| 290 | + // dictDir = "kokoro-multi-lang-v1_0/dict" | ||
| 291 | + // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" | ||
| 292 | + // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" | ||
| 293 | + | ||
| 284 | if (dataDir != null) { | 294 | if (dataDir != null) { |
| 285 | val newDir = copyDataDir(dataDir!!) | 295 | val newDir = copyDataDir(dataDir!!) |
| 286 | dataDir = "$newDir/$dataDir" | 296 | dataDir = "$newDir/$dataDir" |
| @@ -289,7 +299,9 @@ class MainActivity : AppCompatActivity() { | @@ -289,7 +299,9 @@ class MainActivity : AppCompatActivity() { | ||
| 289 | if (dictDir != null) { | 299 | if (dictDir != null) { |
| 290 | val newDir = copyDataDir(dictDir!!) | 300 | val newDir = copyDataDir(dictDir!!) |
| 291 | dictDir = "$newDir/$dictDir" | 301 | dictDir = "$newDir/$dictDir" |
| 292 | - ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | 302 | + if (ruleFsts == null) { |
| 303 | + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | ||
| 304 | + } | ||
| 293 | } | 305 | } |
| 294 | 306 | ||
| 295 | val config = getOfflineTtsConfig( | 307 | val config = getOfflineTtsConfig( |
| @@ -152,6 +152,20 @@ object TtsEngine { | @@ -152,6 +152,20 @@ object TtsEngine { | ||
| 152 | // voices = "voices.bin" | 152 | // voices = "voices.bin" |
| 153 | // dataDir = "kokoro-en-v0_19/espeak-ng-data" | 153 | // dataDir = "kokoro-en-v0_19/espeak-ng-data" |
| 154 | // lang = "eng" | 154 | // lang = "eng" |
| 155 | + | ||
| 156 | + // Example 10 | ||
| 157 | + // kokoro-multi-lang-v1_0 | ||
| 158 | + // modelDir = "kokoro-multi-lang-v1_0" | ||
| 159 | + // modelName = "model.onnx" | ||
| 160 | + // voices = "voices.bin" | ||
| 161 | + // dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data" | ||
| 162 | + // dictDir = "kokoro-multi-lang-v1_0/dict" | ||
| 163 | + // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" | ||
| 164 | + // lang = "eng" | ||
| 165 | + // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" | ||
| 166 | + // | ||
| 167 | + // This model supports many languages, e.g., English, Chinese, etc. | ||
| 168 | + // We set lang to eng here. | ||
| 155 | } | 169 | } |
| 156 | 170 | ||
| 157 | fun createTts(context: Context) { | 171 | fun createTts(context: Context) { |
| @@ -172,7 +186,9 @@ object TtsEngine { | @@ -172,7 +186,9 @@ object TtsEngine { | ||
| 172 | if (dictDir != null) { | 186 | if (dictDir != null) { |
| 173 | val newDir = copyDataDir(context, dictDir!!) | 187 | val newDir = copyDataDir(context, dictDir!!) |
| 174 | dictDir = "$newDir/$dictDir" | 188 | dictDir = "$newDir/$dictDir" |
| 175 | - ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | 189 | + if (ruleFsts == null) { |
| 190 | + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | ||
| 191 | + } | ||
| 176 | } | 192 | } |
| 177 | 193 | ||
| 178 | val config = getOfflineTtsConfig( | 194 | val config = getOfflineTtsConfig( |
| @@ -97,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | @@ -97,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | ||
| 97 | sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt | 97 | sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt |
| 98 | {% endif %} | 98 | {% endif %} |
| 99 | 99 | ||
| 100 | +{% if tts_model.lexicon %} | ||
| 101 | + lexicon={{ tts_model.lexicon }} | ||
| 102 | + sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./TtsEngine.kt | ||
| 103 | +{% endif %} | ||
| 104 | + | ||
| 100 | git diff | 105 | git diff |
| 101 | popd | 106 | popd |
| 102 | 107 | ||
| @@ -104,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | @@ -104,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | ||
| 104 | lang=zh_en | 109 | lang=zh_en |
| 105 | fi | 110 | fi |
| 106 | 111 | ||
| 112 | +if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then | ||
| 113 | + lang=zh_en | ||
| 114 | +fi | ||
| 115 | + | ||
| 107 | for arch in arm64-v8a armeabi-v7a x86_64 x86; do | 116 | for arch in arm64-v8a armeabi-v7a x86_64 x86; do |
| 108 | log "------------------------------------------------------------" | 117 | log "------------------------------------------------------------" |
| 109 | log "build tts apk for $arch" | 118 | log "build tts apk for $arch" |
| @@ -96,6 +96,11 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt | @@ -96,6 +96,11 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt | ||
| 96 | sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt | 96 | sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt |
| 97 | {% endif %} | 97 | {% endif %} |
| 98 | 98 | ||
| 99 | +{% if tts_model.lexicon %} | ||
| 100 | + lexicon={{ tts_model.lexicon }} | ||
| 101 | + sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt | ||
| 102 | +{% endif %} | ||
| 103 | + | ||
| 99 | git diff | 104 | git diff |
| 100 | popd | 105 | popd |
| 101 | 106 | ||
| @@ -103,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | @@ -103,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then | ||
| 103 | lang=zh_en | 108 | lang=zh_en |
| 104 | fi | 109 | fi |
| 105 | 110 | ||
| 111 | +if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then | ||
| 112 | + lang=zh_en | ||
| 113 | +fi | ||
| 114 | + | ||
| 106 | for arch in arm64-v8a armeabi-v7a x86_64 x86; do | 115 | for arch in arm64-v8a armeabi-v7a x86_64 x86; do |
| 107 | log "------------------------------------------------------------" | 116 | log "------------------------------------------------------------" |
| 108 | log "build tts apk for $arch" | 117 | log "build tts apk for $arch" |
| @@ -41,6 +41,7 @@ class TtsModel: | @@ -41,6 +41,7 @@ class TtsModel: | ||
| 41 | dict_dir: Optional[str] = None | 41 | dict_dir: Optional[str] = None |
| 42 | is_char: bool = False | 42 | is_char: bool = False |
| 43 | lang_iso_639_3: str = "" | 43 | lang_iso_639_3: str = "" |
| 44 | + lexicon: str = "" | ||
| 44 | 45 | ||
| 45 | 46 | ||
| 46 | def convert_lang_to_iso_639_3(models: List[TtsModel]): | 47 | def convert_lang_to_iso_639_3(models: List[TtsModel]): |
| @@ -422,7 +423,21 @@ def get_kokoro_models() -> List[TtsModel]: | @@ -422,7 +423,21 @@ def get_kokoro_models() -> List[TtsModel]: | ||
| 422 | m.data_dir = f"{m.model_dir}/espeak-ng-data" | 423 | m.data_dir = f"{m.model_dir}/espeak-ng-data" |
| 423 | m.voices = "voices.bin" | 424 | m.voices = "voices.bin" |
| 424 | 425 | ||
| 425 | - return english_models | 426 | + multi_lingual_models = [ |
| 427 | + TtsModel( | ||
| 428 | + model_dir="kokoro-multi-lang-v1_0", | ||
| 429 | + model_name="model.onnx", | ||
| 430 | + lang="en", | ||
| 431 | + ) | ||
| 432 | + ] | ||
| 433 | + for m in multi_lingual_models: | ||
| 434 | + m.data_dir = f"{m.model_dir}/espeak-ng-data" | ||
| 435 | + m.dict_dir = f"{m.model_dir}/dict" | ||
| 436 | + m.voices = "voices.bin" | ||
| 437 | + m.lexicon = f"{m.model_dir}/lexicon-us-en.txt,{m.model_dir}/lexicon-zh.txt" | ||
| 438 | + m.rule_fsts = f"{m.model_dir}/phone-zh.fst,{m.model_dir}/date-zh.fst,{m.model_dir}/number-zh.fst" | ||
| 439 | + | ||
| 440 | + return english_models + multi_lingual_models | ||
| 426 | 441 | ||
| 427 | 442 | ||
| 428 | def main(): | 443 | def main(): |
| @@ -256,7 +256,11 @@ fun getOfflineTtsConfig( | @@ -256,7 +256,11 @@ fun getOfflineTtsConfig( | ||
| 256 | voices = "$modelDir/$voices", | 256 | voices = "$modelDir/$voices", |
| 257 | tokens = "$modelDir/tokens.txt", | 257 | tokens = "$modelDir/tokens.txt", |
| 258 | dataDir = dataDir, | 258 | dataDir = dataDir, |
| 259 | - lexicon = if ("," in lexicon) lexicon else "$modelDir/$lexicon", | 259 | + lexicon = when { |
| 260 | + lexicon == "" -> lexicon | ||
| 261 | + "," in lexicon -> lexicon | ||
| 262 | + else -> "$modelDir/$lexicon" | ||
| 263 | + }, | ||
| 260 | dictDir = dictDir, | 264 | dictDir = dictDir, |
| 261 | ) | 265 | ) |
| 262 | } else { | 266 | } else { |
-
请 注册 或 登录 后发表评论