Export kokoro 1.0 int8 models (#2137)

Fangjun Kuang · GitHub
Commit 6cabaa11bf0365e8813f8fc55aefaea3d0215d94 6cabaa11 1 parent be0f382a
.github/workflows/export-kokoro.yaml
scripts/kokoro/v1.0/add_meta_data.py
scripts/kokoro/v1.0/dynamic_quantization.py
scripts/kokoro/v1.0/export_onnx.py
scripts/kokoro/v1.0/generate_tokens.py
scripts/kokoro/v1.0/generate_voices_bin.py
scripts/kokoro/v1.0/run.sh
--- a/.github/workflows/export-kokoro.yaml
查看文件 @6cabaa1
+++ b/.github/workflows/export-kokoro.yaml
查看文件 @6cabaa1
@@ -3,7 +3,7 @@ name: export-kokoro-to-onnx
 on:
   push:
     branches:
-       - export-kokoro-2
+       - fix-export-kokoro-1.0-2
 
   workflow_dispatch:
 
@@ -111,6 +111,26 @@ jobs:
 
           ls -lh $d.tar.bz2
 
+           d=kokoro-int8-multi-lang-v1_0
+           mkdir $d
+           cp -v LICENSE $d/LICENSE
+           cp -a espeak-ng-data $d/
+           cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
+           cp -v $src/voices.bin $d/
+           cp -v $src/tokens.txt $d/
+           cp -v $src/lexicon*.txt $d/
+           cp -v $src/README.md $d/README.md
+           cp -av dict $d/
+           cp -v ./*.fst $d/
+           ls -lh $d/
+           echo "---"
+           ls -lh $d/dict
+ 
+           tar cjfv $d.tar.bz2 $d
+           rm -rf $d
+ 
+           ls -lh $d.tar.bz2
+ 
       - name: Collect results 1.1-zh
         if: matrix.version == '1.1-zh'
         shell: bash
@@ -166,6 +186,25 @@ jobs:
           echo "---"
           ls -lh *.tar.bz2
 
+       - name: Release
+         if: github.repository_owner == 'csukuangfj'
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           file: ./*.tar.bz2
+           overwrite: true
+           repo_name: k2-fsa/sherpa-onnx
+           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+           tag: tts-models
+ 
+       - name: Release
+         if: github.repository_owner == 'k2-fsa'
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           file: ./*.tar.bz2
+           overwrite: true
+           tag: tts-models
 
       - name: Publish to huggingface 0.19
         if: matrix.version == '0.19'
@@ -216,7 +255,7 @@ jobs:
             git commit -m "add models"
             git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
 
-       - name: Publish to huggingface 1.0
+       - name: Publish to huggingface 1.0 float32
         if: matrix.version == '1.0'
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -267,6 +306,69 @@ jobs:
             git commit -m "add models"
             git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
 
+       - name: Publish to huggingface 1.0 int8
+         if: matrix.version == '1.0'
+         env:
+           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+         uses: nick-fields/retry@v3
+         with:
+           max_attempts: 20
+           timeout_seconds: 200
+           shell: bash
+           command: |
+             git config --global user.email "csukuangfj@gmail.com"
+             git config --global user.name "Fangjun Kuang"
+ 
+             rm -rf huggingface
+             export GIT_LFS_SKIP_SMUDGE=1
+             export GIT_CLONE_PROTECTION_ACTIVE=false
+ 
+             git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface
+             cd huggingface
+             rm -rf ./*
+             git fetch
+             git pull
+ 
+             git lfs track "cmn_dict"
+             git lfs track "ru_dict"
+             git lfs track "af_dict"
+             git lfs track "ar_dict"
+             git lfs track "da_dict"
+             git lfs track "en_dict"
+             git lfs track "fa_dict"
+             git lfs track "hu_dict"
+             git lfs track "ia_dict"
+             git lfs track "it_dict"
+             git lfs track "lb_dict"
+             git lfs track "phondata"
+             git lfs track "ta_dict"
+             git lfs track "ur_dict"
+             git lfs track "yue_dict"
+             git lfs track "*.wav"
+             git lfs track "lexicon*.txt"
+ 
+             cp -a ../espeak-ng-data ./
+ 
+             cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx
+ 
+             cp -v ../scripts/kokoro/v1.0/tokens.txt .
+             cp -v ../scripts/kokoro/v1.0/voices.bin .
+             cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
+             cp -v ../scripts/kokoro/v1.0/README.md ./README.md
+             cp -v ../LICENSE ./
+             cp -av ../dict ./
+             cp -v ../*.fst ./
+ 
+             git lfs track "*.onnx"
+             git add .
+ 
+             ls -lh
+ 
+             git status
+ 
+             git commit -m "add models"
+             git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true
+ 
       - name: Publish to huggingface 1.1-zh
         if: matrix.version == '1.1-zh'
         env:
@@ -299,7 +401,6 @@ jobs:
 
             cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
 
- 
             cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
             cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
             cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
@@ -350,7 +451,6 @@ jobs:
 
             cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
 
- 
             cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
             cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
             cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
@@ -368,23 +468,3 @@ jobs:
 
             git commit -m "add models"
             git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
- 
-       - name: Release
-         if: github.repository_owner == 'csukuangfj'
-         uses: svenstaro/upload-release-action@v2
-         with:
-           file_glob: true
-           file: ./*.tar.bz2
-           overwrite: true
-           repo_name: k2-fsa/sherpa-onnx
-           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
-           tag: tts-models
- 
-       - name: Release
-         if: github.repository_owner == 'k2-fsa'
-         uses: svenstaro/upload-release-action@v2
-         with:
-           file_glob: true
-           file: ./*.tar.bz2
-           overwrite: true
-           tag: tts-models
--- a/scripts/kokoro/v1.0/add_meta_data.py
查看文件 @6cabaa1
+++ b/scripts/kokoro/v1.0/add_meta_data.py
查看文件 @6cabaa1
@@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id
 
 def main():
     model = onnx.load("./kokoro.onnx")
-     style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu")
+     style = torch.load(
+         "./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
+     )
 
     id2speaker_str = ""
     speaker2id_str = ""
--- a/scripts/kokoro/v1.0/dynamic_quantization.py 0 → 100755
查看文件 @6cabaa1
+++ b/scripts/kokoro/v1.0/dynamic_quantization.py 0 → 100755
查看文件 @6cabaa1
+ #!/usr/bin/env python3
+ import argparse
+ 
+ import onnxruntime
+ from onnxruntime.quantization import QuantType, quantize_dynamic
+ 
+ 
+ def show(filename):
+     session_opts = onnxruntime.SessionOptions()
+     session_opts.log_severity_level = 3
+     sess = onnxruntime.InferenceSession(filename, session_opts)
+     for i in sess.get_inputs():
+         print(i)
+ 
+     print("-----")
+ 
+     for i in sess.get_outputs():
+         print(i)
+ 
+ 
+ """
+ NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
+ NodeArg(name='style', type='tensor(float)', shape=[1, 256])
+ NodeArg(name='speed', type='tensor(float)', shape=[1])
+ -----
+ NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
+ """
+ 
+ 
+ def main():
+     show("./kokoro.onnx")
+ 
+     quantize_dynamic(
+         model_input="kokoro.onnx",
+         model_output="kokoro.int8.onnx",
+         #  op_types_to_quantize=["MatMul"],
+         weight_type=QuantType.QUInt8,
+     )
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/scripts/kokoro/v1.0/export_onnx.py 0 → 100755
查看文件 @6cabaa1
+++ b/scripts/kokoro/v1.0/export_onnx.py 0 → 100755
查看文件 @6cabaa1
+ #!/usr/bin/env python3
+ 
+ import json
+ 
+ import torch
+ from kokoro import KModel
+ from kokoro.model import KModelForONNX
+ 
+ 
+ @torch.no_grad()
+ def main():
+     with open("Kokoro-82M/config.json") as f:
+         config = json.load(f)
+ 
+     model = (
+         KModel(
+             repo_id="not-used-any-value-is-ok",
+             model="Kokoro-82M/kokoro-v1_0.pth",
+             config=config,
+             disable_complex=True,
+         )
+         .to("cpu")
+         .eval()
+     )
+ 
+     x = torch.randint(1, 100, (48,)).numpy()
+     x = torch.LongTensor([[0, *x, 0]])
+ 
+     style = torch.rand(1, 256, dtype=torch.float32)
+     speed = torch.rand(1)
+ 
+     print(x.shape, x.dtype)
+     print(style.shape, style.dtype)
+     print(speed, speed.dtype)
+ 
+     model2 = KModelForONNX(model)
+ 
+     torch.onnx.export(
+         model2,
+         (x, style, speed),
+         "kokoro.onnx",
+         input_names=["tokens", "style", "speed"],
+         output_names=["audio"],
+         dynamic_axes={
+             "tokens": {1: "sequence_length"},
+             "audio": {0: "audio_length"},
+         },
+         opset_version=14,  # minimum working version for this kokoro model is 14
+     )
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/scripts/kokoro/v1.0/generate_tokens.py
查看文件 @6cabaa1
+++ b/scripts/kokoro/v1.0/generate_tokens.py
查看文件 @6cabaa1
@@ -6,7 +6,7 @@ import json
 
 
 def main():
-     with open("config.json") as f:
+     with open("Kokoro-82M/config.json") as f:
         config = json.load(f)
     vocab = config["vocab"]
 
--- a/scripts/kokoro/v1.0/generate_voices_bin.py
查看文件 @6cabaa1
+++ b/scripts/kokoro/v1.0/generate_voices_bin.py
查看文件 @6cabaa1
@@ -71,7 +71,7 @@ def main():
     with open("voices.bin", "wb") as f:
         for _, speaker in id2speaker.items():
             m = torch.load(
-                 f"voices/{speaker}.pt",
+                 f"Kokoro-82M/voices/{speaker}.pt",
                 weights_only=True,
                 map_location="cpu",
             ).numpy()
--- a/scripts/kokoro/v1.0/run.sh
查看文件 @6cabaa1
+++ b/scripts/kokoro/v1.0/run.sh
查看文件 @6cabaa1
@@ -3,93 +3,29 @@
 
 set -ex
 
- if [ ! -f kokoro.onnx ]; then
-   # see https://github.com/taylorchu/kokoro-onnx/releases
-   curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx
- fi
- 
- if [ ! -f config.json ]; then
-   # see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json
-   curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json
- fi
+ git clone https://huggingface.co/hexgrad/Kokoro-82M
 
- # see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83
- # and
 # https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
 #
 # af -> American female
 # am -> American male
 # bf -> British female
 # bm -> British male
- voices=(
- af_alloy
- af_aoede
- af_bella
- af_heart
- af_jessica
- af_kore
- af_nicole
- af_nova
- af_river
- af_sarah
- af_sky
- am_adam
- am_echo
- am_eric
- am_fenrir
- am_liam
- am_michael
- am_onyx
- am_puck
- am_santa
- bf_alice
- bf_emma
- bf_isabella
- bf_lily
- bm_daniel
- bm_fable
- bm_george
- bm_lewis
- ef_dora
- em_alex
- ff_siwis
- hf_alpha
- hf_beta
- hm_omega
- hm_psi
- if_sara
- im_nicola
- jf_alpha
- jf_gongitsune
- jf_nezumi
- jf_tebukuro
- jm_kumo
- pf_dora
- pm_alex
- pm_santa
- zf_xiaobei # 东北话
- zf_xiaoni
- zf_xiaoxiao
- zf_xiaoyi
- zm_yunjian
- zm_yunxi
- zm_yunxia
- zm_yunyang
- )
- 
- mkdir -p voices
- 
- for v in ${voices[@]}; do
-   if [ ! -f voices/$v.pt ]; then
-     curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt
-   fi
- done
+ 
+ if [ ! -f ./kokoro.onnx ]; then
+   python3 ./export_onnx.py
+ fi
+ 
 
 if [ ! -f ./.add-meta-data.done ]; then
   python3 ./add_meta_data.py
   touch ./.add-meta-data.done
 fi
 
+ if [ ! -f ./kokoro.int8.onnx ]; then
+   python3 ./dynamic_quantization.py
+ fi
+ 
 if [ ! -f us_gold.json ]; then
   curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
 fi