Committed by
GitHub
Generate tts samples for MatchaTTS (English). (#2527)
正在显示
3 个修改的文件
包含
75 行增加
和
3 行删除
| @@ -32,7 +32,7 @@ jobs: | @@ -32,7 +32,7 @@ jobs: | ||
| 32 | pip install "numpy<=1.26.4" sherpa-onnx soundfile | 32 | pip install "numpy<=1.26.4" sherpa-onnx soundfile |
| 33 | 33 | ||
| 34 | - name: kitten | 34 | - name: kitten |
| 35 | - if: true | 35 | + if: false |
| 36 | shell: bash | 36 | shell: bash |
| 37 | env: | 37 | env: |
| 38 | HF_TOKEN: ${{ secrets.HF_TOKEN }} | 38 | HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| @@ -68,3 +68,37 @@ jobs: | @@ -68,3 +68,37 @@ jobs: | ||
| 68 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main | 68 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main |
| 69 | popd | 69 | popd |
| 70 | rm -rf hf | 70 | rm -rf hf |
| 71 | + | ||
| 72 | + - name: matcha en (ljspeech) | ||
| 73 | + if: true | ||
| 74 | + shell: bash | ||
| 75 | + env: | ||
| 76 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 77 | + run: | | ||
| 78 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 79 | + git config --global user.name "Fangjun Kuang" | ||
| 80 | + | ||
| 81 | + cd scripts/matcha-tts/en/ | ||
| 82 | + pwd=$PWD | ||
| 83 | + | ||
| 84 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 85 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 86 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf | ||
| 87 | + | ||
| 88 | + mkdir -p ./hf/matcha/icefall-en-ljspeech/mp3 | ||
| 89 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 90 | + tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 91 | + rm matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 92 | + | ||
| 93 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 94 | + | ||
| 95 | + python3 ./generate_samples.py | ||
| 96 | + | ||
| 97 | + pushd hf | ||
| 98 | + git pull | ||
| 99 | + git add . | ||
| 100 | + git commit -m 'add matcha tts en (ljspeech) samples' | ||
| 101 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main | ||
| 102 | + popd | ||
| 103 | + | ||
| 104 | + rm -rf hf |
scripts/matcha-tts/en/generate_samples.py
0 → 100755
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | +""" | ||
| 4 | +Generate samples for | ||
| 5 | +https://k2-fsa.github.io/sherpa/onnx/tts/all/ | ||
| 6 | +""" | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +import sherpa_onnx | ||
| 10 | +import soundfile as sf | ||
| 11 | + | ||
| 12 | +config = sherpa_onnx.OfflineTtsConfig( | ||
| 13 | + model=sherpa_onnx.OfflineTtsModelConfig( | ||
| 14 | + matcha=sherpa_onnx.OfflineTtsMatchaModelConfig( | ||
| 15 | + acoustic_model="matcha-icefall-en_US-ljspeech/model-steps-3.onnx", | ||
| 16 | + vocoder="vocos-22khz-univ.onnx", | ||
| 17 | + tokens="matcha-icefall-en_US-ljspeech/tokens.txt", | ||
| 18 | + lexicon="", | ||
| 19 | + data_dir="matcha-icefall-en_US-ljspeech/espeak-ng-data", | ||
| 20 | + ), | ||
| 21 | + num_threads=2, | ||
| 22 | + ), | ||
| 23 | + max_num_sentences=1, | ||
| 24 | +) | ||
| 25 | + | ||
| 26 | +if not config.validate(): | ||
| 27 | + raise ValueError("Please check your config") | ||
| 28 | + | ||
| 29 | +tts = sherpa_onnx.OfflineTts(config) | ||
| 30 | +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
| 31 | + | ||
| 32 | +audio = tts.generate(text, sid=0, speed=1.0) | ||
| 33 | + | ||
| 34 | +sf.write( | ||
| 35 | + "./hf/matcha/icefall-en-ljspeech/mp3/0.mp3", | ||
| 36 | + audio.samples, | ||
| 37 | + samplerate=audio.sample_rate, | ||
| 38 | +) |
| @@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) { | @@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) { | ||
| 18 | .def(py::init<const std::string &, const std::string &, | 18 | .def(py::init<const std::string &, const std::string &, |
| 19 | const std::string &, const std::string &, | 19 | const std::string &, const std::string &, |
| 20 | const std::string &, const std::string &, float, float>(), | 20 | const std::string &, const std::string &, float, float>(), |
| 21 | - py::arg("acoustic_model"), py::arg("vocoder"), py::arg("lexicon"), | ||
| 22 | - py::arg("tokens"), py::arg("data_dir") = "", | 21 | + py::arg("acoustic_model"), py::arg("vocoder"), |
| 22 | + py::arg("lexicon") = "", py::arg("tokens"), py::arg("data_dir") = "", | ||
| 23 | py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0, | 23 | py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0, |
| 24 | py::arg("length_scale") = 1.0) | 24 | py::arg("length_scale") = 1.0) |
| 25 | .def_readwrite("acoustic_model", &PyClass::acoustic_model) | 25 | .def_readwrite("acoustic_model", &PyClass::acoustic_model) |
-
请 注册 或 登录 后发表评论