Fangjun Kuang
Committed by GitHub

Generate tts samples for MatchaTTS (English). (#2527)

@@ -32,7 +32,7 @@ jobs: @@ -32,7 +32,7 @@ jobs:
32 pip install "numpy<=1.26.4" sherpa-onnx soundfile 32 pip install "numpy<=1.26.4" sherpa-onnx soundfile
33 33
34 - name: kitten 34 - name: kitten
35 - if: true 35 + if: false
36 shell: bash 36 shell: bash
37 env: 37 env:
38 HF_TOKEN: ${{ secrets.HF_TOKEN }} 38 HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -68,3 +68,37 @@ jobs: @@ -68,3 +68,37 @@ jobs:
68 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main 68 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
69 popd 69 popd
70 rm -rf hf 70 rm -rf hf
  71 +
  72 + - name: matcha en (ljspeech)
  73 + if: true
  74 + shell: bash
  75 + env:
  76 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  77 + run: |
  78 + git config --global user.email "csukuangfj@gmail.com"
  79 + git config --global user.name "Fangjun Kuang"
  80 +
  81 + cd scripts/matcha-tts/en/
  82 + pwd=$PWD
  83 +
  84 + export GIT_LFS_SKIP_SMUDGE=1
  85 + export GIT_CLONE_PROTECTION_ACTIVE=false
  86 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
  87 +
  88 + mkdir -p ./hf/matcha/icefall-en-ljspeech/mp3
  89 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  90 + tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
  91 + rm matcha-icefall-en_US-ljspeech.tar.bz2
  92 +
  93 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
  94 +
  95 + python3 ./generate_samples.py
  96 +
  97 + pushd hf
  98 + git pull
  99 + git add .
  100 + git commit -m 'add matcha tts en (ljspeech) samples'
  101 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
  102 + popd
  103 +
  104 + rm -rf hf
  1 +#!/usr/bin/env python3
  2 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +"""
  4 +Generate samples for
  5 +https://k2-fsa.github.io/sherpa/onnx/tts/all/
  6 +"""
  7 +
  8 +
  9 +import sherpa_onnx
  10 +import soundfile as sf
  11 +
  12 +config = sherpa_onnx.OfflineTtsConfig(
  13 + model=sherpa_onnx.OfflineTtsModelConfig(
  14 + matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
  15 + acoustic_model="matcha-icefall-en_US-ljspeech/model-steps-3.onnx",
  16 + vocoder="vocos-22khz-univ.onnx",
  17 + tokens="matcha-icefall-en_US-ljspeech/tokens.txt",
  18 + lexicon="",
  19 + data_dir="matcha-icefall-en_US-ljspeech/espeak-ng-data",
  20 + ),
  21 + num_threads=2,
  22 + ),
  23 + max_num_sentences=1,
  24 +)
  25 +
  26 +if not config.validate():
  27 + raise ValueError("Please check your config")
  28 +
  29 +tts = sherpa_onnx.OfflineTts(config)
  30 +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  31 +
  32 +audio = tts.generate(text, sid=0, speed=1.0)
  33 +
  34 +sf.write(
  35 + "./hf/matcha/icefall-en-ljspeech/mp3/0.mp3",
  36 + audio.samples,
  37 + samplerate=audio.sample_rate,
  38 +)
@@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) { @@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) {
18 .def(py::init<const std::string &, const std::string &, 18 .def(py::init<const std::string &, const std::string &,
19 const std::string &, const std::string &, 19 const std::string &, const std::string &,
20 const std::string &, const std::string &, float, float>(), 20 const std::string &, const std::string &, float, float>(),
21 - py::arg("acoustic_model"), py::arg("vocoder"), py::arg("lexicon"),  
22 - py::arg("tokens"), py::arg("data_dir") = "", 21 + py::arg("acoustic_model"), py::arg("vocoder"),
  22 + py::arg("lexicon") = "", py::arg("tokens"), py::arg("data_dir") = "",
23 py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0, 23 py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0,
24 py::arg("length_scale") = 1.0) 24 py::arg("length_scale") = 1.0)
25 .def_readwrite("acoustic_model", &PyClass::acoustic_model) 25 .def_readwrite("acoustic_model", &PyClass::acoustic_model)