正在显示
7 个修改的文件
包含
214 行增加
和
101 行删除
| @@ -3,7 +3,7 @@ name: export-kokoro-to-onnx | @@ -3,7 +3,7 @@ name: export-kokoro-to-onnx | ||
| 3 | on: | 3 | on: |
| 4 | push: | 4 | push: |
| 5 | branches: | 5 | branches: |
| 6 | - - export-kokoro-2 | 6 | + - fix-export-kokoro-1.0-2 |
| 7 | 7 | ||
| 8 | workflow_dispatch: | 8 | workflow_dispatch: |
| 9 | 9 | ||
| @@ -111,6 +111,26 @@ jobs: | @@ -111,6 +111,26 @@ jobs: | ||
| 111 | 111 | ||
| 112 | ls -lh $d.tar.bz2 | 112 | ls -lh $d.tar.bz2 |
| 113 | 113 | ||
| 114 | + d=kokoro-int8-multi-lang-v1_0 | ||
| 115 | + mkdir $d | ||
| 116 | + cp -v LICENSE $d/LICENSE | ||
| 117 | + cp -a espeak-ng-data $d/ | ||
| 118 | + cp -v $src/kokoro.int8.onnx $d/model.int8.onnx | ||
| 119 | + cp -v $src/voices.bin $d/ | ||
| 120 | + cp -v $src/tokens.txt $d/ | ||
| 121 | + cp -v $src/lexicon*.txt $d/ | ||
| 122 | + cp -v $src/README.md $d/README.md | ||
| 123 | + cp -av dict $d/ | ||
| 124 | + cp -v ./*.fst $d/ | ||
| 125 | + ls -lh $d/ | ||
| 126 | + echo "---" | ||
| 127 | + ls -lh $d/dict | ||
| 128 | + | ||
| 129 | + tar cjfv $d.tar.bz2 $d | ||
| 130 | + rm -rf $d | ||
| 131 | + | ||
| 132 | + ls -lh $d.tar.bz2 | ||
| 133 | + | ||
| 114 | - name: Collect results 1.1-zh | 134 | - name: Collect results 1.1-zh |
| 115 | if: matrix.version == '1.1-zh' | 135 | if: matrix.version == '1.1-zh' |
| 116 | shell: bash | 136 | shell: bash |
| @@ -166,6 +186,25 @@ jobs: | @@ -166,6 +186,25 @@ jobs: | ||
| 166 | echo "---" | 186 | echo "---" |
| 167 | ls -lh *.tar.bz2 | 187 | ls -lh *.tar.bz2 |
| 168 | 188 | ||
| 189 | + - name: Release | ||
| 190 | + if: github.repository_owner == 'csukuangfj' | ||
| 191 | + uses: svenstaro/upload-release-action@v2 | ||
| 192 | + with: | ||
| 193 | + file_glob: true | ||
| 194 | + file: ./*.tar.bz2 | ||
| 195 | + overwrite: true | ||
| 196 | + repo_name: k2-fsa/sherpa-onnx | ||
| 197 | + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 198 | + tag: tts-models | ||
| 199 | + | ||
| 200 | + - name: Release | ||
| 201 | + if: github.repository_owner == 'k2-fsa' | ||
| 202 | + uses: svenstaro/upload-release-action@v2 | ||
| 203 | + with: | ||
| 204 | + file_glob: true | ||
| 205 | + file: ./*.tar.bz2 | ||
| 206 | + overwrite: true | ||
| 207 | + tag: tts-models | ||
| 169 | 208 | ||
| 170 | - name: Publish to huggingface 0.19 | 209 | - name: Publish to huggingface 0.19 |
| 171 | if: matrix.version == '0.19' | 210 | if: matrix.version == '0.19' |
| @@ -216,7 +255,7 @@ jobs: | @@ -216,7 +255,7 @@ jobs: | ||
| 216 | git commit -m "add models" | 255 | git commit -m "add models" |
| 217 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true | 256 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true |
| 218 | 257 | ||
| 219 | - - name: Publish to huggingface 1.0 | 258 | + - name: Publish to huggingface 1.0 float32 |
| 220 | if: matrix.version == '1.0' | 259 | if: matrix.version == '1.0' |
| 221 | env: | 260 | env: |
| 222 | HF_TOKEN: ${{ secrets.HF_TOKEN }} | 261 | HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| @@ -267,6 +306,69 @@ jobs: | @@ -267,6 +306,69 @@ jobs: | ||
| 267 | git commit -m "add models" | 306 | git commit -m "add models" |
| 268 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true | 307 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true |
| 269 | 308 | ||
| 309 | + - name: Publish to huggingface 1.0 int8 | ||
| 310 | + if: matrix.version == '1.0' | ||
| 311 | + env: | ||
| 312 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 313 | + uses: nick-fields/retry@v3 | ||
| 314 | + with: | ||
| 315 | + max_attempts: 20 | ||
| 316 | + timeout_seconds: 200 | ||
| 317 | + shell: bash | ||
| 318 | + command: | | ||
| 319 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 320 | + git config --global user.name "Fangjun Kuang" | ||
| 321 | + | ||
| 322 | + rm -rf huggingface | ||
| 323 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 324 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 325 | + | ||
| 326 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface | ||
| 327 | + cd huggingface | ||
| 328 | + rm -rf ./* | ||
| 329 | + git fetch | ||
| 330 | + git pull | ||
| 331 | + | ||
| 332 | + git lfs track "cmn_dict" | ||
| 333 | + git lfs track "ru_dict" | ||
| 334 | + git lfs track "af_dict" | ||
| 335 | + git lfs track "ar_dict" | ||
| 336 | + git lfs track "da_dict" | ||
| 337 | + git lfs track "en_dict" | ||
| 338 | + git lfs track "fa_dict" | ||
| 339 | + git lfs track "hu_dict" | ||
| 340 | + git lfs track "ia_dict" | ||
| 341 | + git lfs track "it_dict" | ||
| 342 | + git lfs track "lb_dict" | ||
| 343 | + git lfs track "phondata" | ||
| 344 | + git lfs track "ta_dict" | ||
| 345 | + git lfs track "ur_dict" | ||
| 346 | + git lfs track "yue_dict" | ||
| 347 | + git lfs track "*.wav" | ||
| 348 | + git lfs track "lexicon*.txt" | ||
| 349 | + | ||
| 350 | + cp -a ../espeak-ng-data ./ | ||
| 351 | + | ||
| 352 | + cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx | ||
| 353 | + | ||
| 354 | + cp -v ../scripts/kokoro/v1.0/tokens.txt . | ||
| 355 | + cp -v ../scripts/kokoro/v1.0/voices.bin . | ||
| 356 | + cp -v ../scripts/kokoro/v1.0/lexicon*.txt . | ||
| 357 | + cp -v ../scripts/kokoro/v1.0/README.md ./README.md | ||
| 358 | + cp -v ../LICENSE ./ | ||
| 359 | + cp -av ../dict ./ | ||
| 360 | + cp -v ../*.fst ./ | ||
| 361 | + | ||
| 362 | + git lfs track "*.onnx" | ||
| 363 | + git add . | ||
| 364 | + | ||
| 365 | + ls -lh | ||
| 366 | + | ||
| 367 | + git status | ||
| 368 | + | ||
| 369 | + git commit -m "add models" | ||
| 370 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true | ||
| 371 | + | ||
| 270 | - name: Publish to huggingface 1.1-zh | 372 | - name: Publish to huggingface 1.1-zh |
| 271 | if: matrix.version == '1.1-zh' | 373 | if: matrix.version == '1.1-zh' |
| 272 | env: | 374 | env: |
| @@ -299,7 +401,6 @@ jobs: | @@ -299,7 +401,6 @@ jobs: | ||
| 299 | 401 | ||
| 300 | cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx | 402 | cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx |
| 301 | 403 | ||
| 302 | - | ||
| 303 | cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . | 404 | cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . |
| 304 | cp -v ../scripts/kokoro/v1.1-zh/voices.bin . | 405 | cp -v ../scripts/kokoro/v1.1-zh/voices.bin . |
| 305 | cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . | 406 | cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . |
| @@ -350,7 +451,6 @@ jobs: | @@ -350,7 +451,6 @@ jobs: | ||
| 350 | 451 | ||
| 351 | cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx | 452 | cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx |
| 352 | 453 | ||
| 353 | - | ||
| 354 | cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . | 454 | cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . |
| 355 | cp -v ../scripts/kokoro/v1.1-zh/voices.bin . | 455 | cp -v ../scripts/kokoro/v1.1-zh/voices.bin . |
| 356 | cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . | 456 | cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . |
| @@ -368,23 +468,3 @@ jobs: | @@ -368,23 +468,3 @@ jobs: | ||
| 368 | 468 | ||
| 369 | git commit -m "add models" | 469 | git commit -m "add models" |
| 370 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true | 470 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true |
| 371 | - | ||
| 372 | - - name: Release | ||
| 373 | - if: github.repository_owner == 'csukuangfj' | ||
| 374 | - uses: svenstaro/upload-release-action@v2 | ||
| 375 | - with: | ||
| 376 | - file_glob: true | ||
| 377 | - file: ./*.tar.bz2 | ||
| 378 | - overwrite: true | ||
| 379 | - repo_name: k2-fsa/sherpa-onnx | ||
| 380 | - repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 381 | - tag: tts-models | ||
| 382 | - | ||
| 383 | - - name: Release | ||
| 384 | - if: github.repository_owner == 'k2-fsa' | ||
| 385 | - uses: svenstaro/upload-release-action@v2 | ||
| 386 | - with: | ||
| 387 | - file_glob: true | ||
| 388 | - file: ./*.tar.bz2 | ||
| 389 | - overwrite: true | ||
| 390 | - tag: tts-models |
| @@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id | @@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id | ||
| 10 | 10 | ||
| 11 | def main(): | 11 | def main(): |
| 12 | model = onnx.load("./kokoro.onnx") | 12 | model = onnx.load("./kokoro.onnx") |
| 13 | - style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu") | 13 | + style = torch.load( |
| 14 | + "./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu" | ||
| 15 | + ) | ||
| 14 | 16 | ||
| 15 | id2speaker_str = "" | 17 | id2speaker_str = "" |
| 16 | speaker2id_str = "" | 18 | speaker2id_str = "" |
scripts/kokoro/v1.0/dynamic_quantization.py
0 → 100755
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +import argparse | ||
| 3 | + | ||
| 4 | +import onnxruntime | ||
| 5 | +from onnxruntime.quantization import QuantType, quantize_dynamic | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +def show(filename): | ||
| 9 | + session_opts = onnxruntime.SessionOptions() | ||
| 10 | + session_opts.log_severity_level = 3 | ||
| 11 | + sess = onnxruntime.InferenceSession(filename, session_opts) | ||
| 12 | + for i in sess.get_inputs(): | ||
| 13 | + print(i) | ||
| 14 | + | ||
| 15 | + print("-----") | ||
| 16 | + | ||
| 17 | + for i in sess.get_outputs(): | ||
| 18 | + print(i) | ||
| 19 | + | ||
| 20 | + | ||
| 21 | +""" | ||
| 22 | +NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length']) | ||
| 23 | +NodeArg(name='style', type='tensor(float)', shape=[1, 256]) | ||
| 24 | +NodeArg(name='speed', type='tensor(float)', shape=[1]) | ||
| 25 | +----- | ||
| 26 | +NodeArg(name='audio', type='tensor(float)', shape=['audio_length']) | ||
| 27 | +""" | ||
| 28 | + | ||
| 29 | + | ||
| 30 | +def main(): | ||
| 31 | + show("./kokoro.onnx") | ||
| 32 | + | ||
| 33 | + quantize_dynamic( | ||
| 34 | + model_input="kokoro.onnx", | ||
| 35 | + model_output="kokoro.int8.onnx", | ||
| 36 | + # op_types_to_quantize=["MatMul"], | ||
| 37 | + weight_type=QuantType.QUInt8, | ||
| 38 | + ) | ||
| 39 | + | ||
| 40 | + | ||
| 41 | +if __name__ == "__main__": | ||
| 42 | + main() |
scripts/kokoro/v1.0/export_onnx.py
0 → 100755
| 1 | +#!/usr/bin/env python3 | ||
| 2 | + | ||
| 3 | +import json | ||
| 4 | + | ||
| 5 | +import torch | ||
| 6 | +from kokoro import KModel | ||
| 7 | +from kokoro.model import KModelForONNX | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +@torch.no_grad() | ||
| 11 | +def main(): | ||
| 12 | + with open("Kokoro-82M/config.json") as f: | ||
| 13 | + config = json.load(f) | ||
| 14 | + | ||
| 15 | + model = ( | ||
| 16 | + KModel( | ||
| 17 | + repo_id="not-used-any-value-is-ok", | ||
| 18 | + model="Kokoro-82M/kokoro-v1_0.pth", | ||
| 19 | + config=config, | ||
| 20 | + disable_complex=True, | ||
| 21 | + ) | ||
| 22 | + .to("cpu") | ||
| 23 | + .eval() | ||
| 24 | + ) | ||
| 25 | + | ||
| 26 | + x = torch.randint(1, 100, (48,)).numpy() | ||
| 27 | + x = torch.LongTensor([[0, *x, 0]]) | ||
| 28 | + | ||
| 29 | + style = torch.rand(1, 256, dtype=torch.float32) | ||
| 30 | + speed = torch.rand(1) | ||
| 31 | + | ||
| 32 | + print(x.shape, x.dtype) | ||
| 33 | + print(style.shape, style.dtype) | ||
| 34 | + print(speed, speed.dtype) | ||
| 35 | + | ||
| 36 | + model2 = KModelForONNX(model) | ||
| 37 | + | ||
| 38 | + torch.onnx.export( | ||
| 39 | + model2, | ||
| 40 | + (x, style, speed), | ||
| 41 | + "kokoro.onnx", | ||
| 42 | + input_names=["tokens", "style", "speed"], | ||
| 43 | + output_names=["audio"], | ||
| 44 | + dynamic_axes={ | ||
| 45 | + "tokens": {1: "sequence_length"}, | ||
| 46 | + "audio": {0: "audio_length"}, | ||
| 47 | + }, | ||
| 48 | + opset_version=14, # minimum working version for this kokoro model is 14 | ||
| 49 | + ) | ||
| 50 | + | ||
| 51 | + | ||
| 52 | +if __name__ == "__main__": | ||
| 53 | + main() |
| @@ -71,7 +71,7 @@ def main(): | @@ -71,7 +71,7 @@ def main(): | ||
| 71 | with open("voices.bin", "wb") as f: | 71 | with open("voices.bin", "wb") as f: |
| 72 | for _, speaker in id2speaker.items(): | 72 | for _, speaker in id2speaker.items(): |
| 73 | m = torch.load( | 73 | m = torch.load( |
| 74 | - f"voices/{speaker}.pt", | 74 | + f"Kokoro-82M/voices/{speaker}.pt", |
| 75 | weights_only=True, | 75 | weights_only=True, |
| 76 | map_location="cpu", | 76 | map_location="cpu", |
| 77 | ).numpy() | 77 | ).numpy() |
| @@ -3,93 +3,29 @@ | @@ -3,93 +3,29 @@ | ||
| 3 | 3 | ||
| 4 | set -ex | 4 | set -ex |
| 5 | 5 | ||
| 6 | -if [ ! -f kokoro.onnx ]; then | ||
| 7 | - # see https://github.com/taylorchu/kokoro-onnx/releases | ||
| 8 | - curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx | ||
| 9 | -fi | ||
| 10 | - | ||
| 11 | -if [ ! -f config.json ]; then | ||
| 12 | - # see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json | ||
| 13 | - curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json | ||
| 14 | -fi | 6 | +git clone https://huggingface.co/hexgrad/Kokoro-82M |
| 15 | 7 | ||
| 16 | -# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83 | ||
| 17 | -# and | ||
| 18 | # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices | 8 | # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices |
| 19 | # | 9 | # |
| 20 | # af -> American female | 10 | # af -> American female |
| 21 | # am -> American male | 11 | # am -> American male |
| 22 | # bf -> British female | 12 | # bf -> British female |
| 23 | # bm -> British male | 13 | # bm -> British male |
| 24 | -voices=( | ||
| 25 | -af_alloy | ||
| 26 | -af_aoede | ||
| 27 | -af_bella | ||
| 28 | -af_heart | ||
| 29 | -af_jessica | ||
| 30 | -af_kore | ||
| 31 | -af_nicole | ||
| 32 | -af_nova | ||
| 33 | -af_river | ||
| 34 | -af_sarah | ||
| 35 | -af_sky | ||
| 36 | -am_adam | ||
| 37 | -am_echo | ||
| 38 | -am_eric | ||
| 39 | -am_fenrir | ||
| 40 | -am_liam | ||
| 41 | -am_michael | ||
| 42 | -am_onyx | ||
| 43 | -am_puck | ||
| 44 | -am_santa | ||
| 45 | -bf_alice | ||
| 46 | -bf_emma | ||
| 47 | -bf_isabella | ||
| 48 | -bf_lily | ||
| 49 | -bm_daniel | ||
| 50 | -bm_fable | ||
| 51 | -bm_george | ||
| 52 | -bm_lewis | ||
| 53 | -ef_dora | ||
| 54 | -em_alex | ||
| 55 | -ff_siwis | ||
| 56 | -hf_alpha | ||
| 57 | -hf_beta | ||
| 58 | -hm_omega | ||
| 59 | -hm_psi | ||
| 60 | -if_sara | ||
| 61 | -im_nicola | ||
| 62 | -jf_alpha | ||
| 63 | -jf_gongitsune | ||
| 64 | -jf_nezumi | ||
| 65 | -jf_tebukuro | ||
| 66 | -jm_kumo | ||
| 67 | -pf_dora | ||
| 68 | -pm_alex | ||
| 69 | -pm_santa | ||
| 70 | -zf_xiaobei # 东北话 | ||
| 71 | -zf_xiaoni | ||
| 72 | -zf_xiaoxiao | ||
| 73 | -zf_xiaoyi | ||
| 74 | -zm_yunjian | ||
| 75 | -zm_yunxi | ||
| 76 | -zm_yunxia | ||
| 77 | -zm_yunyang | ||
| 78 | -) | ||
| 79 | - | ||
| 80 | -mkdir -p voices | ||
| 81 | - | ||
| 82 | -for v in ${voices[@]}; do | ||
| 83 | - if [ ! -f voices/$v.pt ]; then | ||
| 84 | - curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt | ||
| 85 | - fi | ||
| 86 | -done | 14 | + |
| 15 | +if [ ! -f ./kokoro.onnx ]; then | ||
| 16 | + python3 ./export_onnx.py | ||
| 17 | +fi | ||
| 18 | + | ||
| 87 | 19 | ||
| 88 | if [ ! -f ./.add-meta-data.done ]; then | 20 | if [ ! -f ./.add-meta-data.done ]; then |
| 89 | python3 ./add_meta_data.py | 21 | python3 ./add_meta_data.py |
| 90 | touch ./.add-meta-data.done | 22 | touch ./.add-meta-data.done |
| 91 | fi | 23 | fi |
| 92 | 24 | ||
| 25 | +if [ ! -f ./kokoro.int8.onnx ]; then | ||
| 26 | + python3 ./dynamic_quantization.py | ||
| 27 | +fi | ||
| 28 | + | ||
| 93 | if [ ! -f us_gold.json ]; then | 29 | if [ ! -f us_gold.json ]; then |
| 94 | curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json | 30 | curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json |
| 95 | fi | 31 | fi |
-
请 注册 或 登录 后发表评论