Generate tts samples for MatchaTTS (English). (#2527)

Fangjun Kuang · GitHub
Commit f1f8149a47797c9ee4bbcf7f3467bf5f5603014d f1f8149a 1 parent 4694d675
.github/workflows/generate-tts-samples.yaml
scripts/matcha-tts/en/generate_samples.py
sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
--- a/.github/workflows/generate-tts-samples.yaml
查看文件 @f1f8149
+++ b/.github/workflows/generate-tts-samples.yaml
查看文件 @f1f8149
@@ -32,7 +32,7 @@ jobs:
           pip install "numpy<=1.26.4" sherpa-onnx soundfile
 
       - name: kitten
-         if: true
+         if: false
         shell: bash
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -68,3 +68,37 @@ jobs:
           git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
           popd
           rm -rf hf
+ 
+       - name: matcha en (ljspeech)
+         if: true
+         shell: bash
+         env:
+           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+         run: |
+           git config --global user.email "csukuangfj@gmail.com"
+           git config --global user.name "Fangjun Kuang"
+ 
+           cd scripts/matcha-tts/en/
+           pwd=$PWD
+ 
+           export GIT_LFS_SKIP_SMUDGE=1
+           export GIT_CLONE_PROTECTION_ACTIVE=false
+           git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
+ 
+           mkdir -p ./hf/matcha/icefall-en-ljspeech/mp3
+           curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+           tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+           rm matcha-icefall-en_US-ljspeech.tar.bz2
+ 
+           curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
+ 
+           python3 ./generate_samples.py
+ 
+           pushd hf
+           git pull
+           git add .
+           git commit -m 'add matcha tts en (ljspeech) samples'
+           git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
+           popd
+ 
+           rm -rf hf
--- a/scripts/matcha-tts/en/generate_samples.py 0 → 100755
查看文件 @f1f8149
+++ b/scripts/matcha-tts/en/generate_samples.py 0 → 100755
查看文件 @f1f8149
+ #!/usr/bin/env python3
+ # Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+ """
+ Generate samples for
+ https://k2-fsa.github.io/sherpa/onnx/tts/all/
+ """
+ 
+ 
+ import sherpa_onnx
+ import soundfile as sf
+ 
+ config = sherpa_onnx.OfflineTtsConfig(
+     model=sherpa_onnx.OfflineTtsModelConfig(
+         matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
+             acoustic_model="matcha-icefall-en_US-ljspeech/model-steps-3.onnx",
+             vocoder="vocos-22khz-univ.onnx",
+             tokens="matcha-icefall-en_US-ljspeech/tokens.txt",
+             lexicon="",
+             data_dir="matcha-icefall-en_US-ljspeech/espeak-ng-data",
+         ),
+         num_threads=2,
+     ),
+     max_num_sentences=1,
+ )
+ 
+ if not config.validate():
+     raise ValueError("Please check your config")
+ 
+ tts = sherpa_onnx.OfflineTts(config)
+ text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+ 
+ audio = tts.generate(text, sid=0, speed=1.0)
+ 
+ sf.write(
+     "./hf/matcha/icefall-en-ljspeech/mp3/0.mp3",
+     audio.samples,
+     samplerate=audio.sample_rate,
+ )
--- a/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
查看文件 @f1f8149
+++ b/sherpa-onnx/python/csrc/offline-tts-matcha-model-config.cc
查看文件 @f1f8149
@@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) {
       .def(py::init<const std::string &, const std::string &,
                     const std::string &, const std::string &,
                     const std::string &, const std::string &, float, float>(),
-            py::arg("acoustic_model"), py::arg("vocoder"), py::arg("lexicon"),
-            py::arg("tokens"), py::arg("data_dir") = "",
+            py::arg("acoustic_model"), py::arg("vocoder"),
+            py::arg("lexicon") = "", py::arg("tokens"), py::arg("data_dir") = "",
            py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0,
            py::arg("length_scale") = 1.0)
       .def_readwrite("acoustic_model", &PyClass::acoustic_model)