Fangjun Kuang
Committed by GitHub

Export kokoro 1.0 int8 models (#2137)

... ... @@ -3,7 +3,7 @@ name: export-kokoro-to-onnx
on:
push:
branches:
- export-kokoro-2
- fix-export-kokoro-1.0-2
workflow_dispatch:
... ... @@ -111,6 +111,26 @@ jobs:
ls -lh $d.tar.bz2
d=kokoro-int8-multi-lang-v1_0
mkdir $d
cp -v LICENSE $d/LICENSE
cp -a espeak-ng-data $d/
cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
cp -v $src/voices.bin $d/
cp -v $src/tokens.txt $d/
cp -v $src/lexicon*.txt $d/
cp -v $src/README.md $d/README.md
cp -av dict $d/
cp -v ./*.fst $d/
ls -lh $d/
echo "---"
ls -lh $d/dict
tar cjfv $d.tar.bz2 $d
rm -rf $d
ls -lh $d.tar.bz2
- name: Collect results 1.1-zh
if: matrix.version == '1.1-zh'
shell: bash
... ... @@ -166,6 +186,25 @@ jobs:
echo "---"
ls -lh *.tar.bz2
- name: Release
if: github.repository_owner == 'csukuangfj'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models
- name: Release
if: github.repository_owner == 'k2-fsa'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
tag: tts-models
- name: Publish to huggingface 0.19
if: matrix.version == '0.19'
... ... @@ -216,7 +255,7 @@ jobs:
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
- name: Publish to huggingface 1.0
- name: Publish to huggingface 1.0 float32
if: matrix.version == '1.0'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
... ... @@ -267,6 +306,69 @@ jobs:
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
- name: Publish to huggingface 1.0 int8
if: matrix.version == '1.0'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface
cd huggingface
rm -rf ./*
git fetch
git pull
git lfs track "cmn_dict"
git lfs track "ru_dict"
git lfs track "af_dict"
git lfs track "ar_dict"
git lfs track "da_dict"
git lfs track "en_dict"
git lfs track "fa_dict"
git lfs track "hu_dict"
git lfs track "ia_dict"
git lfs track "it_dict"
git lfs track "lb_dict"
git lfs track "phondata"
git lfs track "ta_dict"
git lfs track "ur_dict"
git lfs track "yue_dict"
git lfs track "*.wav"
git lfs track "lexicon*.txt"
cp -a ../espeak-ng-data ./
cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx
cp -v ../scripts/kokoro/v1.0/tokens.txt .
cp -v ../scripts/kokoro/v1.0/voices.bin .
cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
cp -v ../scripts/kokoro/v1.0/README.md ./README.md
cp -v ../LICENSE ./
cp -av ../dict ./
cp -v ../*.fst ./
git lfs track "*.onnx"
git add .
ls -lh
git status
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true
- name: Publish to huggingface 1.1-zh
if: matrix.version == '1.1-zh'
env:
... ... @@ -299,7 +401,6 @@ jobs:
cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
... ... @@ -350,7 +451,6 @@ jobs:
cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
... ... @@ -368,23 +468,3 @@ jobs:
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
- name: Release
if: github.repository_owner == 'csukuangfj'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models
- name: Release
if: github.repository_owner == 'k2-fsa'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
tag: tts-models
... ...
... ... @@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id
def main():
model = onnx.load("./kokoro.onnx")
style = torch.load("./voices/af_alloy.pt", weights_only=True, map_location="cpu")
style = torch.load(
"./Kokoro-82M/voices/af_alloy.pt", weights_only=True, map_location="cpu"
)
id2speaker_str = ""
speaker2id_str = ""
... ...
#!/usr/bin/env python3
import argparse
import onnxruntime
from onnxruntime.quantization import QuantType, quantize_dynamic
def show(filename):
session_opts = onnxruntime.SessionOptions()
session_opts.log_severity_level = 3
sess = onnxruntime.InferenceSession(filename, session_opts)
for i in sess.get_inputs():
print(i)
print("-----")
for i in sess.get_outputs():
print(i)
"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""
def main():
show("./kokoro.onnx")
quantize_dynamic(
model_input="kokoro.onnx",
model_output="kokoro.int8.onnx",
# op_types_to_quantize=["MatMul"],
weight_type=QuantType.QUInt8,
)
if __name__ == "__main__":
main()
... ...
#!/usr/bin/env python3
import json
import torch
from kokoro import KModel
from kokoro.model import KModelForONNX
@torch.no_grad()
def main():
with open("Kokoro-82M/config.json") as f:
config = json.load(f)
model = (
KModel(
repo_id="not-used-any-value-is-ok",
model="Kokoro-82M/kokoro-v1_0.pth",
config=config,
disable_complex=True,
)
.to("cpu")
.eval()
)
x = torch.randint(1, 100, (48,)).numpy()
x = torch.LongTensor([[0, *x, 0]])
style = torch.rand(1, 256, dtype=torch.float32)
speed = torch.rand(1)
print(x.shape, x.dtype)
print(style.shape, style.dtype)
print(speed, speed.dtype)
model2 = KModelForONNX(model)
torch.onnx.export(
model2,
(x, style, speed),
"kokoro.onnx",
input_names=["tokens", "style", "speed"],
output_names=["audio"],
dynamic_axes={
"tokens": {1: "sequence_length"},
"audio": {0: "audio_length"},
},
opset_version=14, # minimum working version for this kokoro model is 14
)
if __name__ == "__main__":
main()
... ...
... ... @@ -6,7 +6,7 @@ import json
def main():
with open("config.json") as f:
with open("Kokoro-82M/config.json") as f:
config = json.load(f)
vocab = config["vocab"]
... ...
... ... @@ -71,7 +71,7 @@ def main():
with open("voices.bin", "wb") as f:
for _, speaker in id2speaker.items():
m = torch.load(
f"voices/{speaker}.pt",
f"Kokoro-82M/voices/{speaker}.pt",
weights_only=True,
map_location="cpu",
).numpy()
... ...
... ... @@ -3,93 +3,29 @@
set -ex
if [ ! -f kokoro.onnx ]; then
# see https://github.com/taylorchu/kokoro-onnx/releases
curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx
fi
if [ ! -f config.json ]; then
# see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json
curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json
fi
git clone https://huggingface.co/hexgrad/Kokoro-82M
# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83
# and
# https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
#
# af -> American female
# am -> American male
# bf -> British female
# bm -> British male
voices=(
af_alloy
af_aoede
af_bella
af_heart
af_jessica
af_kore
af_nicole
af_nova
af_river
af_sarah
af_sky
am_adam
am_echo
am_eric
am_fenrir
am_liam
am_michael
am_onyx
am_puck
am_santa
bf_alice
bf_emma
bf_isabella
bf_lily
bm_daniel
bm_fable
bm_george
bm_lewis
ef_dora
em_alex
ff_siwis
hf_alpha
hf_beta
hm_omega
hm_psi
if_sara
im_nicola
jf_alpha
jf_gongitsune
jf_nezumi
jf_tebukuro
jm_kumo
pf_dora
pm_alex
pm_santa
zf_xiaobei # 东北话
zf_xiaoni
zf_xiaoxiao
zf_xiaoyi
zm_yunjian
zm_yunxi
zm_yunxia
zm_yunyang
)
mkdir -p voices
for v in ${voices[@]}; do
if [ ! -f voices/$v.pt ]; then
curl -SL --output voices/$v.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/$v.pt
fi
done
if [ ! -f ./kokoro.onnx ]; then
python3 ./export_onnx.py
fi
if [ ! -f ./.add-meta-data.done ]; then
python3 ./add_meta_data.py
touch ./.add-meta-data.done
fi
if [ ! -f ./kokoro.int8.onnx ]; then
python3 ./dynamic_quantization.py
fi
if [ ! -f us_gold.json ]; then
curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
fi
... ...