Fangjun Kuang
Committed by GitHub

Generate tts samples for MatchaTTS (English). (#2527)

... ... @@ -32,7 +32,7 @@ jobs:
pip install "numpy<=1.26.4" sherpa-onnx soundfile
- name: kitten
if: true
if: false
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
... ... @@ -68,3 +68,37 @@ jobs:
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
popd
rm -rf hf
- name: matcha en (ljspeech)
if: true
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
cd scripts/matcha-tts/en/
pwd=$PWD
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
mkdir -p ./hf/matcha/icefall-en-ljspeech/mp3
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
python3 ./generate_samples.py
pushd hf
git pull
git add .
git commit -m 'add matcha tts en (ljspeech) samples'
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
popd
rm -rf hf
... ...
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
"""
Generate samples for
https://k2-fsa.github.io/sherpa/onnx/tts/all/
"""
import sherpa_onnx
import soundfile as sf
config = sherpa_onnx.OfflineTtsConfig(
model=sherpa_onnx.OfflineTtsModelConfig(
matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
acoustic_model="matcha-icefall-en_US-ljspeech/model-steps-3.onnx",
vocoder="vocos-22khz-univ.onnx",
tokens="matcha-icefall-en_US-ljspeech/tokens.txt",
lexicon="",
data_dir="matcha-icefall-en_US-ljspeech/espeak-ng-data",
),
num_threads=2,
),
max_num_sentences=1,
)
if not config.validate():
raise ValueError("Please check your config")
tts = sherpa_onnx.OfflineTts(config)
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
audio = tts.generate(text, sid=0, speed=1.0)
sf.write(
"./hf/matcha/icefall-en-ljspeech/mp3/0.mp3",
audio.samples,
samplerate=audio.sample_rate,
)
... ...
... ... @@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) {
.def(py::init<const std::string &, const std::string &,
const std::string &, const std::string &,
const std::string &, const std::string &, float, float>(),
py::arg("acoustic_model"), py::arg("vocoder"), py::arg("lexicon"),
py::arg("tokens"), py::arg("data_dir") = "",
py::arg("acoustic_model"), py::arg("vocoder"),
py::arg("lexicon") = "", py::arg("tokens"), py::arg("data_dir") = "",
py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0,
py::arg("length_scale") = 1.0)
.def_readwrite("acoustic_model", &PyClass::acoustic_model)
... ...