Fangjun Kuang
Committed by GitHub

Scripts to generate tts samples (#2513)

... ... @@ -117,9 +117,13 @@ jobs:
export GIT_CLONE_PROTECTION_ACTIVE=false
for d in ${dirs[@]}; do
if [ ! -d ../$d ]]; then
echo "d $d"
if [[ ! -d $d ]]; then
echo "$d does not exist"
continue
fi
echo "$d exists"
rm -rf huggingface
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
... ...
... ... @@ -3,7 +3,7 @@ name: export-matcha-fa-en-to-onnx
on:
push:
branches:
- fix-ci
- tts-matcha-samples
workflow_dispatch:
... ... @@ -33,15 +33,48 @@ jobs:
- name: Install Python dependencies
shell: bash
run: |
pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html sherpa-onnx
- name: Run
if: false
shell: bash
run: |
cd scripts/matcha-tts/fa-en
./run.sh
- name: Generate samples
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
shell: bash
run: |
cd scripts/matcha-tts/zh
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
mkdir -p ./hf/matcha/icefall-zh/mp3
./generate_samples.py
pushd hf
git pull
git add .
git commit -m 'add kokoro samples for matcha tts zh'
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
popd
rm -rf hf
ls -lh
- name: Collect results ${{ matrix.version }}
if: false
shell: bash
run: |
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
... ... @@ -73,6 +106,7 @@ jobs:
ls -lh $dst2.tar.bz2
- name: Publish to huggingface male (musa)
if: false
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
... ... @@ -110,6 +144,7 @@ jobs:
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true
- name: Publish to huggingface female (khadijah)
if: false
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
... ... @@ -147,7 +182,8 @@ jobs:
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true
- name: Release
if: github.repository_owner == 'csukuangfj'
# if: github.repository_owner == 'csukuangfj'
if: false
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
... ... @@ -158,7 +194,8 @@ jobs:
tag: tts-models
- name: Release
if: github.repository_owner == 'k2-fsa'
# if: github.repository_owner == 'k2-fsa'
if: false
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
... ...
name: generate-tts-samples
on:
push:
branches:
- tts-samples-2
workflow_dispatch:
concurrency:
group: generate-tts-samples-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_tts_samples:
name: ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Python dependencies
shell: bash
run: |
pip install "numpy<=1.26.4" sherpa-onnx soundfile
- name: kitten
if: true
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
cd scripts/kitten-tts
pwd=$PWD
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
mkdir -p ./hf/kitten/v0.1/mp3
mkdir -p ./hf/kitten/v0.2/mp3
for v in 1 2; do
pushd nano_v0_$v
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_$v-fp16.tar.bz2
tar xf kitten-nano-en-v0_$v-fp16.tar.bz2
rm kitten-nano-en-v0_$v-fp16.tar.bz2
ln -s ../hf .
python3 ./generate_samples.py
rm -rf kitten-nano-en-v0_$v-fp16
popd
done
pushd hf
git pull
git add .
git commit -m 'add kitten tts samples'
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
popd
rm -rf hf
... ...
... ... @@ -45,6 +45,9 @@ For Rust support, please see [sherpa-rs][sherpa-rs]
It also supports WebAssembly.
[Join our discord](https://discord.gg/fJdxzg2VbG)
## Introduction
This repository supports running the following functions **locally**
... ...
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""
import sherpa_onnx
import soundfile as sf
from generate_voices_bin import speaker2id
config = sherpa_onnx.OfflineTtsConfig(
model=sherpa_onnx.OfflineTtsModelConfig(
kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
model="kitten-nano-en-v0_1-fp16/model.fp16.onnx",
voices="kitten-nano-en-v0_1-fp16/voices.bin",
tokens="kitten-nano-en-v0_1-fp16/tokens.txt",
data_dir="kitten-nano-en-v0_1-fp16/espeak-ng-data",
),
num_threads=2,
),
max_num_sentences=1,
)
if not config.validate():
raise ValueError("Please check your config")
tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
for s, i in speaker2id.items():
print(s, i, len(speaker2id))
audio = tts.generate(text, sid=i, speed=1.0)
sf.write(
f"./hf/kitten/v0.1/mp3/{i}-{s}.mp3",
audio.samples,
samplerate=audio.sample_rate,
)
... ...
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""
import sherpa_onnx
import soundfile as sf
from generate_voices_bin import speaker2id
config = sherpa_onnx.OfflineTtsConfig(
model=sherpa_onnx.OfflineTtsModelConfig(
kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
model="kitten-nano-en-v0_2-fp16/model.fp16.onnx",
voices="kitten-nano-en-v0_2-fp16/voices.bin",
tokens="kitten-nano-en-v0_2-fp16/tokens.txt",
data_dir="kitten-nano-en-v0_2-fp16/espeak-ng-data",
),
num_threads=2,
),
max_num_sentences=1,
)
if not config.validate():
raise ValueError("Please check your config")
tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
for s, i in speaker2id.items():
print(s, i, len(speaker2id))
audio = tts.generate(text, sid=i, speed=1.0)
sf.write(
f"./hf/kitten/v0.2/mp3/{i}-{s}.mp3",
audio.samples,
samplerate=audio.sample_rate,
)
... ...
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""
import sherpa_onnx
import soundfile as sf
config = sherpa_onnx.OfflineTtsConfig(
model=sherpa_onnx.OfflineTtsModelConfig(
matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
acoustic_model="matcha-icefall-zh-baker/model-steps-3.onnx",
vocoder="vocos-22khz-univ.onnx",
lexicon="matcha-icefall-zh-baker/lexicon.txt",
tokens="matcha-icefall-zh-baker/tokens.txt",
dict_dir="matcha-icefall-zh-baker/dict",
),
num_threads=2,
),
max_num_sentences=1,
rule_fsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
)
if not config.validate():
raise ValueError("Please check your config")
tts = sherpa_onnx.OfflineTts(config)
text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔."
audio = tts.generate(text, sid=0, speed=1.0)
sf.write(
"./hf/matcha/icefall-zh/mp3/0.mp3",
audio.samples,
samplerate=audio.sample_rate,
)
... ...