Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-08-20 15:01:59 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-08-20 15:01:59 +0800
Commit
866cbe49c08f91d19fcdc52da5b15934fef58b44
866cbe49
1 parent
6eac1af8
Add kitten tts nano v0.2 (#2512)
隐藏空白字符变更
内嵌
并排对比
正在显示
9 个修改的文件
包含
154 行增加
和
5 行删除
.github/workflows/export-kitten.yaml
scripts/apk/generate-tts-apk-script.py
scripts/kitten-tts/nano_v0_2/add_meta_data.py
scripts/kitten-tts/nano_v0_2/convert_opset.py
scripts/kitten-tts/nano_v0_2/generate_tokens.py
scripts/kitten-tts/nano_v0_2/generate_voices_bin.py
scripts/kitten-tts/nano_v0_2/run.sh
scripts/kitten-tts/nano_v0_2/show.py
scripts/kitten-tts/nano_v0_2/test.py
.github/workflows/export-kitten.yaml
查看文件 @
866cbe4
...
...
@@ -3,7 +3,7 @@ name: export-kitten-to-onnx
on
:
push
:
branches
:
-
kitten-
tts
-
kitten-
0.2
workflow_dispatch
:
...
...
@@ -20,6 +20,7 @@ jobs:
fail-fast
:
false
matrix
:
os
:
[
ubuntu-latest
]
version
:
[
"
nano_v0_1"
,
"
nano_v0_2"
]
python-version
:
[
"
3.10"
]
steps
:
...
...
@@ -40,7 +41,7 @@ jobs:
HF_TOKEN
:
${{ secrets.HF_TOKEN }}
shell
:
bash
run
:
|
cd scripts/kitten-tts/
nano_v0_1
cd scripts/kitten-tts/
${{ matrix.version }}
./run.sh
-
name
:
Collect results
...
...
@@ -50,9 +51,18 @@ jobs:
tar xf espeak-ng-data.tar.bz2
rm espeak-ng-data.tar.bz2
src=scripts/kitten-tts/nano_v0_1
version=${{ matrix.version }}
d=kitten-nano-en-v0_1-fp16
src=scripts/kitten-tts/$version
if [[ $version == "nano_v0_1" ]]; then
d=kitten-nano-en-v0_1-fp16
elif [[ $version == "nano_v0_2" ]]; then
d=kitten-nano-en-v0_2-fp16
else
echo "version $version"
exit 1
fi
mkdir $d
cp -a LICENSE $d/LICENSE
...
...
@@ -100,12 +110,16 @@ jobs:
dirs=(
kitten-nano-en-v0_1-fp16
kitten-nano-en-v0_2-fp16
)
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
for d in ${dirs[@]}; do
if [ ! -d ../$d ]]; then
continue
fi
rm -rf huggingface
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
...
...
scripts/apk/generate-tts-apk-script.py
查看文件 @
866cbe4
...
...
@@ -515,7 +515,12 @@ def get_kitten_models() -> List[TtsModel]:
model_dir
=
"kitten-nano-en-v0_1-fp16"
,
model_name
=
"model.fp16.onnx"
,
lang
=
"en"
,
)
),
TtsModel
(
model_dir
=
"kitten-nano-en-v0_2-fp16"
,
model_name
=
"model.fp16.onnx"
,
lang
=
"en"
,
),
]
for
m
in
english_models
:
m
.
data_dir
=
f
"{m.model_dir}/espeak-ng-data"
...
...
scripts/kitten-tts/nano_v0_2/add_meta_data.py
0 → 100755
查看文件 @
866cbe4
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
import
argparse
import
numpy
as
np
import
onnx
from
generate_voices_bin
import
speaker2id
def
get_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
,
required
=
True
,
help
=
"input and output onnx model"
)
return
parser
.
parse_args
()
def
main
():
args
=
get_args
()
print
(
args
.
model
)
model
=
onnx
.
load
(
args
.
model
)
style
=
np
.
load
(
"./voices.npz"
)
style_shape
=
style
[
list
(
style
.
keys
())[
0
]]
.
shape
speaker2id_str
=
""
id2speaker_str
=
""
sep
=
""
for
s
,
i
in
speaker2id
.
items
():
speaker2id_str
+=
f
"{sep}{s}->{i}"
id2speaker_str
+=
f
"{sep}{i}->{s}"
sep
=
","
meta_data
=
{
"model_type"
:
"kitten-tts"
,
"language"
:
"English"
,
"has_espeak"
:
1
,
"sample_rate"
:
24000
,
"version"
:
1
,
"voice"
:
"en-us"
,
"style_dim"
:
","
.
join
(
map
(
str
,
style_shape
)),
"n_speakers"
:
len
(
speaker2id
),
"speaker2id"
:
speaker2id_str
,
"id2speaker"
:
id2speaker_str
,
"speaker_names"
:
","
.
join
(
map
(
str
,
speaker2id
.
keys
())),
"model_url"
:
"https://huggingface.co/KittenML/kitten-tts-nano-0.2"
,
"see_also"
:
"https://github.com/KittenML/KittenTTS"
,
"maintainer"
:
"k2-fsa"
,
"comment"
:
"This is kitten-tts-nano-0.2 and supports only English"
,
}
print
(
model
.
metadata_props
)
while
len
(
model
.
metadata_props
):
model
.
metadata_props
.
pop
()
for
key
,
value
in
meta_data
.
items
():
meta
=
model
.
metadata_props
.
add
()
meta
.
key
=
key
meta
.
value
=
str
(
value
)
print
(
"--------------------"
)
print
(
model
.
metadata_props
)
onnx
.
save
(
model
,
args
.
model
)
print
(
f
"Please see {args.model}"
)
if
__name__
==
"__main__"
:
main
()
...
...
scripts/kitten-tts/nano_v0_2/convert_opset.py
0 → 100755
查看文件 @
866cbe4
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
"""
Change the model so that it can be run in onnxruntime 1.17.1
"""
import
onnx
def
main
():
model
=
onnx
.
load
(
"kitten_tts_nano_v0_2.onnx"
)
# Print current opsets
for
opset
in
model
.
opset_import
:
print
(
f
"Domain: '{opset.domain}', Version: {opset.version}"
)
# Modify the opset versions (be careful!)
for
opset
in
model
.
opset_import
:
if
opset
.
domain
==
""
:
# ai.onnx domain
opset
.
version
=
19
# change from 20 to 19
elif
opset
.
domain
==
"ai.onnx.ml"
:
opset
.
version
=
4
# change from 5 to 4
# Save the modified model
onnx
.
save
(
model
,
"model.fp16.onnx"
)
if
__name__
==
"__main__"
:
main
()
...
...
scripts/kitten-tts/nano_v0_2/generate_tokens.py
0 → 120000
查看文件 @
866cbe4
../
nano_v0_1
/
generate_tokens
.
py
\ No newline at end of file
...
...
scripts/kitten-tts/nano_v0_2/generate_voices_bin.py
0 → 120000
查看文件 @
866cbe4
../
nano_v0_1
/
generate_voices_bin
.
py
\ No newline at end of file
...
...
scripts/kitten-tts/nano_v0_2/run.sh
0 → 100755
查看文件 @
866cbe4
#!/usr/bin/env bash
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
set
-ex
if
[
! -f kitten_tts_nano_v0_2.onnx
]
;
then
curl -SL -O https://huggingface.co/KittenML/kitten-tts-nano-0.2/resolve/main/kitten_tts_nano_v0_2.onnx
fi
if
[
! -f voices.npz
]
;
then
curl -SL -O https://huggingface.co/KittenML/kitten-tts-nano-0.2/resolve/main/voices.npz
fi
./generate_voices_bin.py
./generate_tokens.py
./convert_opset.py
./show.py
./add_meta_data.py --model ./model.fp16.onnx
# ./test.py --model ./model.fp16.onnx --tokens ./tokens.txt --voice ./voices.bin
ls -lh
...
...
scripts/kitten-tts/nano_v0_2/show.py
0 → 120000
查看文件 @
866cbe4
../
nano_v0_1
/
show
.
py
\ No newline at end of file
...
...
scripts/kitten-tts/nano_v0_2/test.py
0 → 120000
查看文件 @
866cbe4
../
nano_v0_1
/
test
.
py
\ No newline at end of file
...
...
请
注册
或
登录
后发表评论