Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-04-20 14:35:02 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-04-20 14:35:02 +0800
Commit
6cabaa11bf0365e8813f8fc55aefaea3d0215d94
6cabaa11
1 parent
be0f382a
Export kokoro 1.0 int8 models (#2137)
隐藏空白字符变更
内嵌
并排对比
正在显示
7 个修改的文件
包含
214 行增加
和
101 行删除
.github/workflows/export-kokoro.yaml
scripts/kokoro/v1.0/add_meta_data.py
scripts/kokoro/v1.0/dynamic_quantization.py
scripts/kokoro/v1.0/export_onnx.py
scripts/kokoro/v1.0/generate_tokens.py
scripts/kokoro/v1.0/generate_voices_bin.py
scripts/kokoro/v1.0/run.sh
.github/workflows/export-kokoro.yaml
查看文件 @
6cabaa1
...
...
@@ -3,7 +3,7 @@ name: export-kokoro-to-onnx
on
:
push
:
branches
:
-
export-kokoro
-2
-
fix-export-kokoro-1.0
-2
workflow_dispatch
:
...
...
@@ -111,6 +111,26 @@ jobs:
ls -lh $d.tar.bz2
d=kokoro-int8-multi-lang-v1_0
mkdir $d
cp -v LICENSE $d/LICENSE
cp -a espeak-ng-data $d/
cp -v $src/kokoro.int8.onnx $d/model.int8.onnx
cp -v $src/voices.bin $d/
cp -v $src/tokens.txt $d/
cp -v $src/lexicon*.txt $d/
cp -v $src/README.md $d/README.md
cp -av dict $d/
cp -v ./*.fst $d/
ls -lh $d/
echo "---"
ls -lh $d/dict
tar cjfv $d.tar.bz2 $d
rm -rf $d
ls -lh $d.tar.bz2
-
name
:
Collect results 1.1-zh
if
:
matrix.version == '1.1-zh'
shell
:
bash
...
...
@@ -166,6 +186,25 @@ jobs:
echo "---"
ls -lh *.tar.bz2
-
name
:
Release
if
:
github.repository_owner == 'csukuangfj'
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
repo_name
:
k2-fsa/sherpa-onnx
repo_token
:
${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag
:
tts-models
-
name
:
Release
if
:
github.repository_owner == 'k2-fsa'
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
tag
:
tts-models
-
name
:
Publish to huggingface 0.19
if
:
matrix.version == '0.19'
...
...
@@ -216,7 +255,7 @@ jobs:
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true
-
name
:
Publish to huggingface 1.0
-
name
:
Publish to huggingface 1.0
float32
if
:
matrix.version == '1.0'
env
:
HF_TOKEN
:
${{ secrets.HF_TOKEN }}
...
...
@@ -267,6 +306,69 @@ jobs:
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
-
name
:
Publish to huggingface 1.0 int8
if
:
matrix.version == '1.0'
env
:
HF_TOKEN
:
${{ secrets.HF_TOKEN }}
uses
:
nick-fields/retry@v3
with
:
max_attempts
:
20
timeout_seconds
:
200
shell
:
bash
command
:
|
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 huggingface
cd huggingface
rm -rf ./*
git fetch
git pull
git lfs track "cmn_dict"
git lfs track "ru_dict"
git lfs track "af_dict"
git lfs track "ar_dict"
git lfs track "da_dict"
git lfs track "en_dict"
git lfs track "fa_dict"
git lfs track "hu_dict"
git lfs track "ia_dict"
git lfs track "it_dict"
git lfs track "lb_dict"
git lfs track "phondata"
git lfs track "ta_dict"
git lfs track "ur_dict"
git lfs track "yue_dict"
git lfs track "*.wav"
git lfs track "lexicon*.txt"
cp -a ../espeak-ng-data ./
cp -v ../scripts/kokoro/v1.0/kokoro.int8.onnx ./model.int8.onnx
cp -v ../scripts/kokoro/v1.0/tokens.txt .
cp -v ../scripts/kokoro/v1.0/voices.bin .
cp -v ../scripts/kokoro/v1.0/lexicon*.txt .
cp -v ../scripts/kokoro/v1.0/README.md ./README.md
cp -v ../LICENSE ./
cp -av ../dict ./
cp -v ../*.fst ./
git lfs track "*.onnx"
git add .
ls -lh
git status
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_0 main || true
-
name
:
Publish to huggingface 1.1-zh
if
:
matrix.version == '1.1-zh'
env
:
...
...
@@ -299,7 +401,6 @@ jobs:
cp -v ../scripts/kokoro/v1.1-zh/kokoro.onnx ./model.onnx
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
...
...
@@ -350,7 +451,6 @@ jobs:
cp -v ../scripts/kokoro/v1.1-zh/kokoro.int8.onnx ./model.int8.onnx
cp -v ../scripts/kokoro/v1.1-zh/tokens.txt .
cp -v ../scripts/kokoro/v1.1-zh/voices.bin .
cp -v ../scripts/kokoro/v1.1-zh/lexicon*.txt .
...
...
@@ -368,23 +468,3 @@ jobs:
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-int8-multi-lang-v1_1 main || true
-
name
:
Release
if
:
github.repository_owner == 'csukuangfj'
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
repo_name
:
k2-fsa/sherpa-onnx
repo_token
:
${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag
:
tts-models
-
name
:
Release
if
:
github.repository_owner == 'k2-fsa'
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
tag
:
tts-models
...
...
scripts/kokoro/v1.0/add_meta_data.py
查看文件 @
6cabaa1
...
...
@@ -10,7 +10,9 @@ from generate_voices_bin import speaker2id
def
main
():
model
=
onnx
.
load
(
"./kokoro.onnx"
)
style
=
torch
.
load
(
"./voices/af_alloy.pt"
,
weights_only
=
True
,
map_location
=
"cpu"
)
style
=
torch
.
load
(
"./Kokoro-82M/voices/af_alloy.pt"
,
weights_only
=
True
,
map_location
=
"cpu"
)
id2speaker_str
=
""
speaker2id_str
=
""
...
...
scripts/kokoro/v1.0/dynamic_quantization.py
0 → 100755
查看文件 @
6cabaa1
#!/usr/bin/env python3
import
argparse
import
onnxruntime
from
onnxruntime.quantization
import
QuantType
,
quantize_dynamic
def
show
(
filename
):
session_opts
=
onnxruntime
.
SessionOptions
()
session_opts
.
log_severity_level
=
3
sess
=
onnxruntime
.
InferenceSession
(
filename
,
session_opts
)
for
i
in
sess
.
get_inputs
():
print
(
i
)
print
(
"-----"
)
for
i
in
sess
.
get_outputs
():
print
(
i
)
"""
NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'sequence_length'])
NodeArg(name='style', type='tensor(float)', shape=[1, 256])
NodeArg(name='speed', type='tensor(float)', shape=[1])
-----
NodeArg(name='audio', type='tensor(float)', shape=['audio_length'])
"""
def
main
():
show
(
"./kokoro.onnx"
)
quantize_dynamic
(
model_input
=
"kokoro.onnx"
,
model_output
=
"kokoro.int8.onnx"
,
# op_types_to_quantize=["MatMul"],
weight_type
=
QuantType
.
QUInt8
,
)
if
__name__
==
"__main__"
:
main
()
...
...
scripts/kokoro/v1.0/export_onnx.py
0 → 100755
查看文件 @
6cabaa1
#!/usr/bin/env python3
import
json
import
torch
from
kokoro
import
KModel
from
kokoro.model
import
KModelForONNX
@torch.no_grad
()
def
main
():
with
open
(
"Kokoro-82M/config.json"
)
as
f
:
config
=
json
.
load
(
f
)
model
=
(
KModel
(
repo_id
=
"not-used-any-value-is-ok"
,
model
=
"Kokoro-82M/kokoro-v1_0.pth"
,
config
=
config
,
disable_complex
=
True
,
)
.
to
(
"cpu"
)
.
eval
()
)
x
=
torch
.
randint
(
1
,
100
,
(
48
,))
.
numpy
()
x
=
torch
.
LongTensor
([[
0
,
*
x
,
0
]])
style
=
torch
.
rand
(
1
,
256
,
dtype
=
torch
.
float32
)
speed
=
torch
.
rand
(
1
)
print
(
x
.
shape
,
x
.
dtype
)
print
(
style
.
shape
,
style
.
dtype
)
print
(
speed
,
speed
.
dtype
)
model2
=
KModelForONNX
(
model
)
torch
.
onnx
.
export
(
model2
,
(
x
,
style
,
speed
),
"kokoro.onnx"
,
input_names
=
[
"tokens"
,
"style"
,
"speed"
],
output_names
=
[
"audio"
],
dynamic_axes
=
{
"tokens"
:
{
1
:
"sequence_length"
},
"audio"
:
{
0
:
"audio_length"
},
},
opset_version
=
14
,
# minimum working version for this kokoro model is 14
)
if
__name__
==
"__main__"
:
main
()
...
...
scripts/kokoro/v1.0/generate_tokens.py
查看文件 @
6cabaa1
...
...
@@ -6,7 +6,7 @@ import json
def
main
():
with
open
(
"config.json"
)
as
f
:
with
open
(
"
Kokoro-82M/
config.json"
)
as
f
:
config
=
json
.
load
(
f
)
vocab
=
config
[
"vocab"
]
...
...
scripts/kokoro/v1.0/generate_voices_bin.py
查看文件 @
6cabaa1
...
...
@@ -71,7 +71,7 @@ def main():
with
open
(
"voices.bin"
,
"wb"
)
as
f
:
for
_
,
speaker
in
id2speaker
.
items
():
m
=
torch
.
load
(
f
"voices/{speaker}.pt"
,
f
"
Kokoro-82M/
voices/{speaker}.pt"
,
weights_only
=
True
,
map_location
=
"cpu"
,
)
.
numpy
()
...
...
scripts/kokoro/v1.0/run.sh
查看文件 @
6cabaa1
...
...
@@ -3,93 +3,29 @@
set
-ex
if
[
! -f kokoro.onnx
]
;
then
# see https://github.com/taylorchu/kokoro-onnx/releases
curl -SL -O https://github.com/taylorchu/kokoro-onnx/releases/download/v0.2.0/kokoro.onnx
fi
if
[
! -f config.json
]
;
then
# see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json
curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/config.json
fi
git clone https://huggingface.co/hexgrad/Kokoro-82M
# see https://huggingface.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L83
# and
# https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
#
# af -> American female
# am -> American male
# bf -> British female
# bm -> British male
voices
=(
af_alloy
af_aoede
af_bella
af_heart
af_jessica
af_kore
af_nicole
af_nova
af_river
af_sarah
af_sky
am_adam
am_echo
am_eric
am_fenrir
am_liam
am_michael
am_onyx
am_puck
am_santa
bf_alice
bf_emma
bf_isabella
bf_lily
bm_daniel
bm_fable
bm_george
bm_lewis
ef_dora
em_alex
ff_siwis
hf_alpha
hf_beta
hm_omega
hm_psi
if_sara
im_nicola
jf_alpha
jf_gongitsune
jf_nezumi
jf_tebukuro
jm_kumo
pf_dora
pm_alex
pm_santa
zf_xiaobei
# 东北话
zf_xiaoni
zf_xiaoxiao
zf_xiaoyi
zm_yunjian
zm_yunxi
zm_yunxia
zm_yunyang
)
mkdir -p voices
for
v
in
${
voices
[@]
}
;
do
if
[
! -f voices/
$v
.pt
]
;
then
curl -SL --output voices/
$v
.pt https://huggingface.co/hexgrad/Kokoro-82M/resolve/main/voices/
$v
.pt
fi
done
if
[
! -f ./kokoro.onnx
]
;
then
python3 ./export_onnx.py
fi
if
[
! -f ./.add-meta-data.done
]
;
then
python3 ./add_meta_data.py
touch ./.add-meta-data.done
fi
if
[
! -f ./kokoro.int8.onnx
]
;
then
python3 ./dynamic_quantization.py
fi
if
[
! -f us_gold.json
]
;
then
curl -SL -O https://raw.githubusercontent.com/hexgrad/misaki/refs/heads/main/misaki/data/us_gold.json
fi
...
...
请
注册
或
登录
后发表评论