Fangjun Kuang
Committed by GitHub

Upload TTS APKs to huggingface (#400)

  1 +name: apk-tts
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - apk-tts
  7 + - apk
  8 + tags:
  9 + - '*'
  10 +
  11 + workflow_dispatch:
  12 +
  13 +concurrency:
  14 + group: apk-tts-${{ github.ref }}
  15 + cancel-in-progress: true
  16 +
  17 +permissions:
  18 + contents: write
  19 +
  20 +jobs:
  21 + apk_tts:
  22 + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
  23 + runs-on: ${{ matrix.os }}
  24 + name: apk for tts ${{ matrix.index }}/${{ matrix.total }}
  25 + strategy:
  26 + fail-fast: false
  27 + matrix:
  28 + os: [ubuntu-latest]
  29 + total: ["12"]
  30 + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]
  31 +
  32 + steps:
  33 + - uses: actions/checkout@v4
  34 + with:
  35 + fetch-depth: 0
  36 +
  37 + - name: Display NDK HOME
  38 + shell: bash
  39 + run: |
  40 + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
  41 + ls -lh ${ANDROID_NDK_LATEST_HOME}
  42 +
  43 + - name: Install Python dependencies
  44 + shell: bash
  45 + run: |
  46 + python3 -m pip install --upgrade pip jinja2
  47 +
  48 + - name: Generate build script
  49 + shell: bash
  50 + run: |
  51 + cd scripts/apk
  52 +
  53 + total=${{ matrix.total }}
  54 + index=${{ matrix.index }}
  55 +
  56 + ./generate-tts-apk-script.py --total $total --index $index
  57 +
  58 + chmod +x build-apk-tts.sh
  59 + mv -v ./build-apk-tts.sh ../..
  60 +
  61 + - name: build APK
  62 + shell: bash
  63 + run: |
  64 + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
  65 + ./build-apk-tts.sh
  66 +
  67 + - name: Display APK
  68 + shell: bash
  69 + run: |
  70 + ls -lh ./apks/
  71 + du -h -d1 .
  72 +
  73 + # - uses: actions/upload-artifact@v3
  74 + # with:
  75 + # name: tts-apk
  76 + # path: ./apks/*.apk
  77 +
  78 + - name: Publish to huggingface
  79 + env:
  80 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  81 + uses: nick-fields/retry@v2
  82 + with:
  83 + max_attempts: 20
  84 + timeout_seconds: 200
  85 + shell: bash
  86 + command: |
  87 + git config --global user.email "csukuangfj@gmail.com"
  88 + git config --global user.name "Fangjun Kuang"
  89 +
  90 + rm -rf huggingface
  91 + export GIT_LFS_SKIP_SMUDGE=1
  92 +
  93 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
  94 + cd huggingface
  95 + git pull
  96 +
  97 + mkdir -p tts
  98 + cp -v ../apks/*.apk ./tts/
  99 + git status
  100 + git lfs track "*.apk"
  101 + git add .
  102 + git commit -m "add more apks"
  103 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main
@@ -38,7 +38,6 @@ jobs: @@ -38,7 +38,6 @@ jobs:
38 shell: bash 38 shell: bash
39 run: | 39 run: |
40 export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME 40 export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
41 - ./build-apk-tts.sh  
42 ./build-apk-vad.sh 41 ./build-apk-vad.sh
43 ./build-apk-two-pass.sh 42 ./build-apk-two-pass.sh
44 ./build-apk.sh 43 ./build-apk.sh
@@ -84,6 +84,8 @@ class MainActivity : AppCompatActivity() { @@ -84,6 +84,8 @@ class MainActivity : AppCompatActivity() {
84 val ok = audio.samples.size > 0 && audio.save(filename) 84 val ok = audio.samples.size > 0 && audio.save(filename)
85 if (ok) { 85 if (ok) {
86 play.isEnabled = true 86 play.isEnabled = true
  87 + // Play automatically after generation
  88 + onClickPlay()
87 } 89 }
88 } 90 }
89 91
@@ -97,10 +99,24 @@ class MainActivity : AppCompatActivity() { @@ -97,10 +99,24 @@ class MainActivity : AppCompatActivity() {
97 } 99 }
98 100
99 fun initTts() { 101 fun initTts() {
100 - // 0 - vits-vctk (multi-speaker, English)  
101 - // 1 - vits-zh-aishell3 (multi-speaker, Chinese)  
102 - val type = 0  
103 - val config = getOfflineTtsConfig(type = type, debug = true)!! 102 + var modelDir :String?
  103 + var modelName :String?
  104 +
  105 + // The purpose of such a design is to make the CI test easier
  106 + // Please see
  107 + // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
  108 + modelDir = null
  109 + modelName = null
  110 +
  111 + // Example 1:
  112 + // modelDir = "vits-vctk"
  113 + // modelName = "vits-vctk.onnx"
  114 +
  115 + // Example 2:
  116 + // modelDir = "vits-piper-en_US-lessac-medium"
  117 + // modelName = "en_US-lessac-medium.onnx"
  118 +
  119 + val config = getOfflineTtsConfig(modelDir = modelDir!!, modelName = modelName!!)!!
104 tts = OfflineTts(assetManager = application.assets, config = config) 120 tts = OfflineTts(assetManager = application.assets, config = config)
105 } 121 }
106 } 122 }
@@ -116,45 +116,17 @@ class OfflineTts( @@ -116,45 +116,17 @@ class OfflineTts(
116 // please refer to 116 // please refer to
117 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html 117 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
118 // to download models 118 // to download models
119 -//  
120 -// You can change the type as you wish  
121 -fun getOfflineTtsConfig(type: Int, debug: Boolean = false): OfflineTtsConfig? {  
122 - when (type) {  
123 - 0 -> {  
124 - val modelDir = "vits-vctk"  
125 - return OfflineTtsConfig(  
126 - model = OfflineTtsModelConfig(  
127 - vits = OfflineTtsVitsModelConfig(  
128 - model = "$modelDir/vits-vctk.onnx",  
129 - lexicon = "$modelDir/lexicon.txt",  
130 - tokens = "$modelDir/tokens.txt"  
131 - ),  
132 - numThreads = 2,  
133 - debug = debug,  
134 - provider = "cpu",  
135 - )  
136 - )  
137 - }  
138 -  
139 - 1 -> {  
140 - val modelDir = "vits-zh-aishell3"  
141 - return OfflineTtsConfig(  
142 - model = OfflineTtsModelConfig(  
143 - vits = OfflineTtsVitsModelConfig(  
144 - model = "$modelDir/vits-aishell3.onnx",  
145 - lexicon = "$modelDir/lexicon.txt",  
146 - tokens = "$modelDir/tokens.txt"  
147 - ),  
148 - numThreads = 2,  
149 - debug = debug,  
150 - provider = "cpu",  
151 - )  
152 - )  
153 - }  
154 - }  
155 -  
156 - println("Unsupported type $type")  
157 -  
158 - return null  
159 - 119 +fun getOfflineTtsConfig(modelDir: String, modelName: String): OfflineTtsConfig? {
  120 + return OfflineTtsConfig(
  121 + model = OfflineTtsModelConfig(
  122 + vits = OfflineTtsVitsModelConfig(
  123 + model = "$modelDir/$modelName",
  124 + lexicon = "$modelDir/lexicon.txt",
  125 + tokens = "$modelDir/tokens.txt"
  126 + ),
  127 + numThreads = 2,
  128 + debug = false,
  129 + provider = "cpu",
  130 + )
  131 + )
160 } 132 }
  1 +build-apk-tts.sh
  1 +# Introduction
  2 +
  3 +This folder contains scripts for building Android APKs.
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
  2 +#
  3 +# Auto generated! Please DO NOT EDIT!
2 4
3 # Please set the environment variable ANDROID_NDK 5 # Please set the environment variable ANDROID_NDK
4 # before running this script 6 # before running this script
@@ -27,61 +29,31 @@ log "====================x86-64====================" @@ -27,61 +29,31 @@ log "====================x86-64===================="
27 log "====================x86====================" 29 log "====================x86===================="
28 ./build-android-x86.sh 30 ./build-android-x86.sh
29 31
30 -  
31 mkdir -p apks 32 mkdir -p apks
32 33
33 -# Download the model 34 +{% for tts_model in tts_model_list %}
34 pushd ./android/SherpaOnnxTts/app/src/main/assets/ 35 pushd ./android/SherpaOnnxTts/app/src/main/assets/
35 -mkdir vits-vctk  
36 -  
37 -cd vits-vctk  
38 -wget -qq https://huggingface.co/csukuangfj/vits-vctk/resolve/main/vits-vctk.onnx  
39 -wget -qq https://huggingface.co/csukuangfj/vits-vctk/resolve/main/lexicon.txt  
40 -wget -qq https://huggingface.co/csukuangfj/vits-vctk/resolve/main/tokens.txt  
41 -popd  
42 -  
43 -for arch in arm64-v8a armeabi-v7a x86_64 x86; do  
44 - log "------------------------------------------------------------"  
45 - log "build tts apk for $arch"  
46 - log "------------------------------------------------------------"  
47 - src_arch=$arch  
48 - if [ $arch == "armeabi-v7a" ]; then  
49 - src_arch=armv7-eabi  
50 - elif [ $arch == "x86_64" ]; then  
51 - src_arch=x86-64  
52 - fi 36 +model_dir={{ tts_model.model_dir }}
  37 +model_name={{ tts_model.model_name }}
  38 +lang={{ tts_model.lang }}
53 39
54 - ls -lh ./build-android-$src_arch/install/lib/*.so 40 +mkdir $model_dir
  41 +cd $model_dir
  42 +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/$model_name
  43 +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/lexicon.txt
  44 +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/tokens.txt
  45 +wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/MODEL_CARD 2>/dev/null || true
55 46
56 - cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/  
57 -  
58 - pushd ./android/SherpaOnnxTts  
59 - ./gradlew build  
60 - popd  
61 -  
62 - mv android/SherpaOnnxTts/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-en-tts-multi-speaker-vctk.apk  
63 - ls -lh apks  
64 - rm -v ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/*.so  
65 -done  
66 -  
67 -rm -rf ./android/SherpaOnnxTts/app/src/main/assets/vits-vctk 47 +popd
  48 +# Now we are at the project root directory
68 49
69 git checkout . 50 git checkout .
70 pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx 51 pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
71 -sed -i.bak s/"type = 0"/"type = 1"/ ./MainActivity.kt 52 +sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
  53 +sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
72 git diff 54 git diff
73 popd 55 popd
74 56
75 -pushd ./android/SherpaOnnxTts/app/src/main/assets/  
76 -mkdir vits-zh-aishell3  
77 -cd vits-zh-aishell3  
78 -  
79 -wget -qq https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/vits-aishell3.onnx  
80 -wget -qq https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/lexicon.txt  
81 -wget -qq https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/tokens.txt  
82 -  
83 -popd  
84 -  
85 for arch in arm64-v8a armeabi-v7a x86_64 x86; do 57 for arch in arm64-v8a armeabi-v7a x86_64 x86; do
86 log "------------------------------------------------------------" 58 log "------------------------------------------------------------"
87 log "build tts apk for $arch" 59 log "build tts apk for $arch"
@@ -101,12 +73,13 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do @@ -101,12 +73,13 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
101 ./gradlew build 73 ./gradlew build
102 popd 74 popd
103 75
104 - mv android/SherpaOnnxTts/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-zh-tts-multi-speaker-aishell3.apk 76 + mv android/SherpaOnnxTts/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-tts-$model_dir.apk
105 ls -lh apks 77 ls -lh apks
106 rm -v ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/*.so 78 rm -v ./android/SherpaOnnxTts/app/src/main/jniLibs/$arch/*.so
107 done 79 done
108 80
109 -rm -rf ./android/SherpaOnnxTts/app/src/main/assets/vits-vctk 81 +rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
  82 +{% endfor %}
110 83
111 git checkout . 84 git checkout .
112 85
  1 +#!/usr/bin/env python3
  2 +
  3 +from dataclasses import dataclass
  4 +
  5 +import jinja2
  6 +from typing import List
  7 +import argparse
  8 +
  9 +
  10 +def get_args():
  11 + parser = argparse.ArgumentParser()
  12 + parser.add_argument(
  13 + "--total",
  14 + type=int,
  15 + default=1,
  16 + help="Number of runners",
  17 + )
  18 + parser.add_argument(
  19 + "--index",
  20 + type=int,
  21 + default=0,
  22 + help="Index of the current runner",
  23 + )
  24 + return parser.parse_args()
  25 +
  26 +
  27 +@dataclass
  28 +class TtsModel:
  29 + model_dir: str
  30 + model_name: str
  31 + lang: str # en, zh, fr, de, etc.
  32 +
  33 +
  34 +def get_all_models() -> List[TtsModel]:
  35 + return [
  36 + TtsModel(
  37 + model_dir="vits-zh-aishell3", model_name="vits-aishell3.onnx", lang="zh"
  38 + ),
  39 + # English (US)
  40 + # fmt: off
  41 + TtsModel(model_dir="vits-vctk", model_name="vits-vctk.onnx", lang="en"),
  42 + TtsModel(model_dir="vits-ljs", model_name="vits-ljs.onnx", lang="en"),
  43 + TtsModel(model_dir="vits-piper-en_US-amy-low", model_name="en_US-amy-low.onnx", lang="en",),
  44 + TtsModel(model_dir="vits-piper-en_US-amy-medium", model_name="en_US-amy-medium.onnx", lang="en",),
  45 + TtsModel(model_dir="vits-piper-en_US-arctic-medium", model_name="en_US-arctic-medium.onnx", lang="en",),
  46 + TtsModel(model_dir="vits-piper-en_US-danny-low", model_name="en_US-danny-low.onnx", lang="en",),
  47 + TtsModel(model_dir="vits-piper-en_US-hfc_male-medium", model_name="en_US-hfc_male-medium.onnx", lang="en",),
  48 + TtsModel(model_dir="vits-piper-en_US-joe-medium", model_name="en_US-joe-medium.onnx", lang="en",),
  49 + TtsModel(model_dir="vits-piper-en_US-kathleen-low", model_name="en_US-kathleen-low.onnx", lang="en",),
  50 + TtsModel(model_dir="vits-piper-en_US-kusal-medium", model_name="en_US-kusal-medium.onnx", lang="en",),
  51 + TtsModel(model_dir="vits-piper-en_US-l2arctic-medium", model_name="en_US-l2arctic-medium.onnx", lang="en",),
  52 + TtsModel(model_dir="vits-piper-en_US-lessac-low", model_name="en_US-lessac-low.onnx", lang="en",),
  53 + TtsModel(model_dir="vits-piper-en_US-lessac-medium", model_name="en_US-lessac-medium.onnx", lang="en",),
  54 + TtsModel(model_dir="vits-piper-en_US-lessac-high", model_name="en_US-lessac-high.onnx", lang="en",),
  55 + TtsModel(model_dir="vits-piper-en_US-libritts-high", model_name="en_US-libritts-high.onnx", lang="en",),
  56 + TtsModel(model_dir="vits-piper-en_US-libritts_r-medium", model_name="en_US-libritts_r-medium.onnx", lang="en",),
  57 + TtsModel(model_dir="vits-piper-en_US-ryan-low", model_name="en_US-ryan-low.onnx", lang="en",),
  58 + TtsModel(model_dir="vits-piper-en_US-ryan-medium", model_name="en_US-ryan-medium.onnx", lang="en",),
  59 + TtsModel(model_dir="vits-piper-en_US-ryan-high", model_name="en_US-ryan-high.onnx", lang="en",),
  60 + # English (GB)
  61 + TtsModel(model_dir="vits-piper-en_GB-alan-low", model_name="en_GB-alan-low.onnx",lang="en",),
  62 + TtsModel(model_dir="vits-piper-en_GB-alan-medium", model_name="en_GB-alan-medium.onnx",lang="en",),
  63 + TtsModel(model_dir="vits-piper-en_GB-alba-medium", model_name="en_GB-alba-medium.onnx",lang="en",),
  64 + TtsModel(model_dir="vits-piper-en_GB-jenny_dioco-medium", model_name="en_GB-jenny_dioco-medium.onnx",lang="en",),
  65 + TtsModel(model_dir="vits-piper-en_GB-northern_english_male-medium", model_name="en_GB-northern_english_male-medium.onnx",lang="en",),
  66 + TtsModel(model_dir="vits-piper-en_GB-semaine-medium", model_name="en_GB-semaine-medium.onnx",lang="en",),
  67 + TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low", model_name="en_GB-southern_english_female-low.onnx",lang="en",),
  68 + TtsModel(model_dir="vits-piper-en_GB-vctk-medium", model_name="en_GB-vctk-medium.onnx",lang="en",),
  69 + # German (DE)
  70 + TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low", model_name="de_DE-eva_k-x_low.onnx",lang="de",),
  71 + TtsModel(model_dir="vits-piper-de_DE-karlsson-low", model_name="de_DE-karlsson-low.onnx",lang="de",),
  72 + TtsModel(model_dir="vits-piper-de_DE-kerstin-low", model_name="de_DE-kerstin-low.onnx",lang="de",),
  73 + TtsModel(model_dir="vits-piper-de_DE-pavoque-low", model_name="de_DE-pavoque-low.onnx",lang="de",),
  74 + TtsModel(model_dir="vits-piper-de_DE-ramona-low", model_name="de_DE-ramona-low.onnx",lang="de",),
  75 + TtsModel(model_dir="vits-piper-de_DE-thorsten-low", model_name="de_DE-thorsten-low.onnx",lang="de",),
  76 + TtsModel(model_dir="vits-piper-de_DE-thorsten-medium", model_name="de_DE-thorsten-medium.onnx",lang="de",),
  77 + TtsModel(model_dir="vits-piper-de_DE-thorsten-high", model_name="de_DE-thorsten-high.onnx",lang="de",),
  78 + TtsModel(model_dir="vits-piper-de_DE-thorsten_emotional-medium", model_name="de_DE-thorsten_emotional-medium.onnx",lang="de",),
  79 + # French (FR)
  80 + TtsModel(model_dir="vits-piper-fr_FR-upmc-medium", model_name="fr_FR-upmc-medium.onnx",lang="fr",),
  81 + TtsModel(model_dir="vits-piper-fr_FR-siwis-low", model_name="fr_FR-siwis-low.onnx",lang="fr",),
  82 + TtsModel(model_dir="vits-piper-fr_FR-siwis-medium", model_name="fr_FR-siwis-medium.onnx",lang="fr",),
  83 +
  84 + # Spanish (ES)
  85 + TtsModel(model_dir="vits-piper-es_ES-carlfm-x_low", model_name="es_ES-carlfm-x_low.onnx",lang="es",),
  86 + TtsModel(model_dir="vits-piper-es_ES-davefx-medium", model_name="es_ES-davefx-medium.onnx",lang="es",),
  87 + TtsModel(model_dir="vits-piper-es_ES-mls_10246-low", model_name="es_ES-mls_10246-low.onnx",lang="es",),
  88 + TtsModel(model_dir="vits-piper-es_ES-mls_9972-low", model_name="es_ES-mls_9972-low.onnx",lang="es",),
  89 + TtsModel(model_dir="vits-piper-es_ES-sharvard-medium", model_name="es_ES-sharvard-medium.onnx",lang="es",),
  90 +
  91 + # Spanish (MX)
  92 + TtsModel(model_dir="vits-piper-es_MX-ald-medium", model_name="es_MX-ald-medium.onnx",lang="es",),
  93 + # fmt: on
  94 + ]
  95 +
  96 +
  97 +def main():
  98 + args = get_args()
  99 + index = args.index
  100 + total = args.total
  101 + assert 0 <= index < total, (index, total)
  102 + environment = jinja2.Environment()
  103 + with open("./build-apk-tts.sh.in") as f:
  104 + s = f.read()
  105 + template = environment.from_string(s)
  106 + d = dict()
  107 + all_model_list = get_all_models()
  108 + num_models = len(all_model_list)
  109 +
  110 + num_per_runner = num_models // total
  111 + if num_per_runner <= 0:
  112 + raise ValueError(f"num_models: {num_models}, num_runners: {total}")
  113 +
  114 + start = index * num_per_runner
  115 + end = start + num_per_runner
  116 + if index == args.total - 1:
  117 + end = num_models
  118 +
  119 + print(f"{index}/{total}: {start}-{end}/{num_models}")
  120 + d["tts_model_list"] = all_model_list[start:end]
  121 + s = template.render(**d)
  122 + with open("./build-apk-tts.sh", "w") as f:
  123 + print(s, file=f)
  124 +
  125 +
  126 +if __name__ == "__main__":
  127 + main()