Fangjun Kuang
Committed by GitHub

Export kokoro 1.0 int8 models (#2137)

@@ -3,7 +3,7 @@ name: export-kokoro-to-onnx @@ -3,7 +3,7 @@ name: export-kokoro-to-onnx
3 on: 3 on:
4 push: 4 push:
5 branches: 5 branches:
6 - - export-kokoro-2 6 + - fix-export-kokoro-1.0-2
7 7
8 workflow_dispatch: 8 workflow_dispatch:
9 9
@@ -111,6 +111,26 @@ jobs: @@ -111,6 +111,26 @@ jobs:
111 111
112 ls -lh $d.tar.bz2 112 ls -lh $d.tar.bz2
113 113
  114 + d=kokoro-int8-multi-lang-v1_0
  115 + mkdir $d
  116 + cp -v LICENSE $d/LICENSE
  117 + cp -a espeak-ng-data $d/
  118 + cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
  119 + cp -v $src/voices.bin $d/
  120 + cp -v $src/tokens.txt $d/
  121 + cp -v $src/lexicon*.txt $d/
  122 + cp -v $src/README.md $d/README.md
  123 + cp -av dict $d/
  124 + cp -v ./*.fst $d/
  125 + ls -lh $d/
  126 + echo "---"
  127 + ls -lh $d/dict
  128 +
  129 + tar cjfv $d.tar.bz2 $d
  130 + rm -rf $d
  131 +
  132 + ls -lh $d.tar.bz2
  133 +
114 - name: Collect results 1.1-zh 134 - name: Collect results 1.1-zh
115 if: matrix.version == '1.1-zh' 135 if: matrix.version == '1.1-zh'
116 shell: bash 136 shell: bash
@@ -166,6 +186,25 @@ jobs: @@ -166,6 +186,25 @@ jobs:
166 echo "---" 186 echo "---"
167 ls -lh *.tar.bz2 187 ls -lh *.tar.bz2
168 188
  189 + - name: Release
  190 + if: github.repository_owner == 'csukuangfj'
  191 + uses: svenstaro/upload-release-action@v2
  192 + with:
  193 + file_glob: true
  194 + file: ./*.tar.bz2
  195 + overwrite: true
  196 + repo_name: k2-fsa/sherpa-onnx
  197 + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  198 + tag: tts-models
  199 +
  200 + - name: Release
  201 + if: github.repository_owner == 'k2-fsa'
  202 + uses: svenstaro/upload-release-action@v2
  203 + with:
  204 + file_glob: true
  205 + file: ./*.tar.bz2
  206 + overwrite: true
  207 + tag: tts-models
169 208
170 - name: Publish to huggingface 0.19 209 - name: Publish to huggingface 0.19
171 if: matrix.version == '0.19' 210 if: matrix.version == '0.19'
@@ -216,7 +255,7 @@ jobs: @@ -216,7 +255,7 @@ jobs:
216 git commit -m "add models" 255 git commit -m "add models"
217 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true 256 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
218 257
219 - - name: Publish to huggingface 1.0 258 + - name: Publish to huggingface 1.0 float32
220 if: matrix.version == '1.0' 259 if: matrix.version == '1.0'
221 env: 260 env:
222 HF_TOKEN: ${{ secrets.HF_TOKEN }} 261 HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -267,6 +306,69 @@ jobs: @@ -267,6 +306,69 @@ jobs:
267 git commit -m "add models" 306 git commit -m "add models"
268 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true 307 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
269 308
  309 + - name: Publish to huggingface 1.0 int8
  310 + if: matrix.version == '1.0'
  311 + env:
  312 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  313 + uses: nick-fields/retry@v3
  314 + with:
  315 + max_attempts: 20
  316 + timeout_seconds: 200
  317 + shell: bash
  318 + command: |
  319 + git config --global user.email "csukuangfj@gmail.com"
  320 + git config --global user.name "Fangjun Kuang"
  321 +
  322 + rm -rf huggingface
  323 + export GIT_LFS_SKIP_SMUDGE=1
  324 + export GIT_CLONE_PROTECTION_ACTIVE=false
  325 +
  326 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface
  327 + cd huggingface
  328 + rm -rf ./*
  329 + git fetch
  330 + git pull
  331 +
  332 + git lfs track "cmn_dict"
  333 + git lfs track "ru_dict"
  334 + git lfs track "af_dict"
  335 + git lfs track "ar_dict"
  336 + git lfs track "da_dict"
  337 + git lfs track "en_dict"
  338 + git lfs track "fa_dict"
  339 + git lfs track "hu_dict"
  340 + git lfs track "ia_dict"
  341 + git lfs track "it_dict"
  342 + git lfs track "lb_dict"
  343 + git lfs track "phondata"
  344 + git lfs track "ta_dict"
  345 + git lfs track "ur_dict"
  346 + git lfs track "yue_dict"
  347 + git lfs track "*.wav"
  348 + git lfs track "lexicon*.txt"
  349 +
  350 + cp -a ../espeak-ng-data ./
  351 +
  352 + cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx
  353 +
  354 + cp -v ../scripts/kokoro/v1.0/tokens.txt .
  355 + cp -v ../scripts/kokoro/v1.0/voices.bin .
  356 + cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
  357 + cp -v ../scripts/kokoro/v1.0/README.md ./README.md
  358 + cp -v ../LICENSE ./
  359 + cp -av ../dict ./
  360 + cp -v ../*.fst ./
  361 +
  362 + git lfs track "*.onnx"
  363 + git add .
  364 +
  365 + ls -lh
  366 +
  367 + git status
  368 +
  369 + git commit -m "add models"
  370 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true
  371 +
270 - name: Publish to huggingface 1.1-zh 372 - name: Publish to huggingface 1.1-zh
271 if: matrix.version == '1.1-zh' 373 if: matrix.version == '1.1-zh'
272 env: 374 env:
@@ -299,7 +401,6 @@ jobs: @@ -299,7 +401,6 @@ jobs:
299 401
300 cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx 402 cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
301 403
302 -  
303 cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . 404 cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
304 cp -v ../scripts/kokoro/v1.1-zh/voices.bin . 405 cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
305 cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . 406 cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
@@ -350,7 +451,6 @@ jobs: @@ -350,7 +451,6 @@ jobs:
350 451
351 cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx 452 cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
352 453
353 -  
354 cp -v ../scripts/kokoro/v1.1-zh/tokens.txt . 454 cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
355 cp -v ../scripts/kokoro/v1.1-zh/voices.bin . 455 cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
356 cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt . 456 cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
@@ -368,23 +468,3 @@ jobs: @@ -368,23 +468,3 @@ jobs:
368 468
369 git commit -m "add models" 469 git commit -m "add models"
370 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true 470 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
371 -  
372 - - name: Release  
373 - if: github.repository_owner == 'csukuangfj'  
374 - uses: svenstaro/upload-release-action@v2  
375 - with:  
376 - file_glob: true  
377 - file: ./*.tar.bz2  
378 - overwrite: true  
379 - repo_name: k2-fsa/sherpa-onnx  
380 - repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}  
381 - tag: tts-models  
382 -  
383 - - name: Release  
384 - if: github.repository_owner == 'k2-fsa'  
385 - uses: svenstaro/upload-release-action@v2  
386 - with:  
387 - file_glob: true  
388 - file: ./*.tar.bz2  
389 - overwrite: true  
390 - tag: tts-models  
@@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id @@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id
10 10
11 def main(): 11 def main():
12 model = onnx.load("./kokoro.onnx") 12 model = onnx.load("./kokoro.onnx")
13 - style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu") 13 + style = torch.load(
  14 + "./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
  15 + )
14 16
15 id2speaker_str = "" 17 id2speaker_str = ""
16 speaker2id_str = "" 18 speaker2id_str = ""
  1 +#!/usr/bin/env python3
  2 +import argparse
  3 +
  4 +import onnxruntime
  5 +from onnxruntime.quantization import QuantType, quantize_dynamic
  6 +
  7 +
  8 +def show(filename):
  9 + session_opts = onnxruntime.SessionOptions()
  10 + session_opts.log_severity_level = 3
  11 + sess = onnxruntime.InferenceSession(filename, session_opts)
  12 + for i in sess.get_inputs():
  13 + print(i)
  14 +
  15 + print("-----")
  16 +
  17 + for i in sess.get_outputs():
  18 + print(i)
  19 +
  20 +
  21 +"""
  22 +NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
  23 +NodeArg(name='style', type='tensor(float)', shape=[1, 256])
  24 +NodeArg(name='speed', type='tensor(float)', shape=[1])
  25 +-----
  26 +NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
  27 +"""
  28 +
  29 +
  30 +def main():
  31 + show("./kokoro.onnx")
  32 +
  33 + quantize_dynamic(
  34 + model_input="kokoro.onnx",
  35 + model_output="kokoro.int8.onnx",
  36 + # op_types_to_quantize=["MatMul"],
  37 + weight_type=QuantType.QUInt8,
  38 + )
  39 +
  40 +
  41 +if __name__ == "__main__":
  42 + main()
  1 +#!/usr/bin/env python3
  2 +
  3 +import json
  4 +
  5 +import torch
  6 +from kokoro import KModel
  7 +from kokoro.model import KModelForONNX
  8 +
  9 +
  10 +@torch.no_grad()
  11 +def main():
  12 + with open("Kokoro-82M/config.json") as f:
  13 + config = json.load(f)
  14 +
  15 + model = (
  16 + KModel(
  17 + repo_id="not-used-any-value-is-ok",
  18 + model="Kokoro-82M/kokoro-v1_0.pth",
  19 + config=config,
  20 + disable_complex=True,
  21 + )
  22 + .to("cpu")
  23 + .eval()
  24 + )
  25 +
  26 + x = torch.randint(1, 100, (48,)).numpy()
  27 + x = torch.LongTensor([[0, *x, 0]])
  28 +
  29 + style = torch.rand(1, 256, dtype=torch.float32)
  30 + speed = torch.rand(1)
  31 +
  32 + print(x.shape, x.dtype)
  33 + print(style.shape, style.dtype)
  34 + print(speed, speed.dtype)
  35 +
  36 + model2 = KModelForONNX(model)
  37 +
  38 + torch.onnx.export(
  39 + model2,
  40 + (x, style, speed),
  41 + "kokoro.onnx",
  42 + input_names=["tokens", "style", "speed"],
  43 + output_names=["audio"],
  44 + dynamic_axes={
  45 + "tokens": {1: "sequence_length"},
  46 + "audio": {0: "audio_length"},
  47 + },
  48 + opset_version=14, # minimum working version for this kokoro model is 14
  49 + )
  50 +
  51 +
  52 +if __name__ == "__main__":
  53 + main()
@@ -6,7 +6,7 @@ import json @@ -6,7 +6,7 @@ import json
6 6
7 7
8 def main(): 8 def main():
9 - with open("config.json") as f: 9 + with open("Kokoro-82M/config.json") as f:
10 config = json.load(f) 10 config = json.load(f)
11 vocab = config["vocab"] 11 vocab = config["vocab"]
12 12
@@ -71,7 +71,7 @@ def main(): @@ -71,7 +71,7 @@ def main():
71 with open("voices.bin", "wb") as f: 71 with open("voices.bin", "wb") as f:
72 for _, speaker in id2speaker.items(): 72 for _, speaker in id2speaker.items():
73 m = torch.load( 73 m = torch.load(
74 - f"voices/{speaker}.pt", 74 + f"Kokoro-82M/voices/{speaker}.pt",
75 weights_only=True, 75 weights_only=True,
76 map_location="cpu", 76 map_location="cpu",
77 ).numpy() 77 ).numpy()
@@ -3,93 +3,29 @@ @@ -3,93 +3,29 @@
3 3
4 set -ex 4 set -ex
5 5
6 -if [ ! -f kokoro.onnx ]; then  
7 - # see https://github.com/taylorchu/kokoro-onnx/releases  
8 - curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx  
9 -fi  
10 -  
11 -if [ ! -f config.json ]; then  
12 - # see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json  
13 - curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json  
14 -fi 6 +git clone https://huggingface.co/hexgrad/Kokoro-82M
15 7
16 -# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83  
17 -# and  
18 # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices 8 # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
19 # 9 #
20 # af -> American female 10 # af -> American female
21 # am -> American male 11 # am -> American male
22 # bf -> British female 12 # bf -> British female
23 # bm -> British male 13 # bm -> British male
24 -voices=(  
25 -af_alloy  
26 -af_aoede  
27 -af_bella  
28 -af_heart  
29 -af_jessica  
30 -af_kore  
31 -af_nicole  
32 -af_nova  
33 -af_river  
34 -af_sarah  
35 -af_sky  
36 -am_adam  
37 -am_echo  
38 -am_eric  
39 -am_fenrir  
40 -am_liam  
41 -am_michael  
42 -am_onyx  
43 -am_puck  
44 -am_santa  
45 -bf_alice  
46 -bf_emma  
47 -bf_isabella  
48 -bf_lily  
49 -bm_daniel  
50 -bm_fable  
51 -bm_george  
52 -bm_lewis  
53 -ef_dora  
54 -em_alex  
55 -ff_siwis  
56 -hf_alpha  
57 -hf_beta  
58 -hm_omega  
59 -hm_psi  
60 -if_sara  
61 -im_nicola  
62 -jf_alpha  
63 -jf_gongitsune  
64 -jf_nezumi  
65 -jf_tebukuro  
66 -jm_kumo  
67 -pf_dora  
68 -pm_alex  
69 -pm_santa  
70 -zf_xiaobei # 东北话  
71 -zf_xiaoni  
72 -zf_xiaoxiao  
73 -zf_xiaoyi  
74 -zm_yunjian  
75 -zm_yunxi  
76 -zm_yunxia  
77 -zm_yunyang  
78 -)  
79 -  
80 -mkdir -p voices  
81 -  
82 -for v in ${voices[@]}; do  
83 - if [ ! -f voices/$v.pt ]; then  
84 - curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt  
85 - fi  
86 -done 14 +
  15 +if [ ! -f ./kokoro.onnx ]; then
  16 + python3 ./export_onnx.py
  17 +fi
  18 +
87 19
88 if [ ! -f ./.add-meta-data.done ]; then 20 if [ ! -f ./.add-meta-data.done ]; then
89 python3 ./add_meta_data.py 21 python3 ./add_meta_data.py
90 touch ./.add-meta-data.done 22 touch ./.add-meta-data.done
91 fi 23 fi
92 24
  25 +if [ ! -f ./kokoro.int8.onnx ]; then
  26 + python3 ./dynamic_quantization.py
  27 +fi
  28 +
93 if [ ! -f us_gold.json ]; then 29 if [ ! -f us_gold.json ]; then
94 curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json 30 curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
95 fi 31 fi