继续操作前请注册或者登录。
Fangjun Kuang
Committed by GitHub

Scripts to generate tts samples (#2513)

@@ -117,9 +117,13 @@ jobs: @@ -117,9 +117,13 @@ jobs:
117 export GIT_CLONE_PROTECTION_ACTIVE=false 117 export GIT_CLONE_PROTECTION_ACTIVE=false
118 118
119 for d in ${dirs[@]}; do 119 for d in ${dirs[@]}; do
120 - if [ ! -d ../$d ]]; then 120 + echo "d $d"
  121 + if [[ ! -d $d ]]; then
  122 + echo "$d does not exist"
121 continue 123 continue
122 fi 124 fi
  125 +
  126 + echo "$d exists"
123 rm -rf huggingface 127 rm -rf huggingface
124 128
125 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface 129 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
@@ -3,7 +3,7 @@ name: export-matcha-fa-en-to-onnx @@ -3,7 +3,7 @@ name: export-matcha-fa-en-to-onnx
3 on: 3 on:
4 push: 4 push:
5 branches: 5 branches:
6 - - fix-ci 6 + - tts-matcha-samples
7 7
8 workflow_dispatch: 8 workflow_dispatch:
9 9
@@ -33,15 +33,48 @@ jobs: @@ -33,15 +33,48 @@ jobs:
33 - name: Install Python dependencies 33 - name: Install Python dependencies
34 shell: bash 34 shell: bash
35 run: | 35 run: |
36 - pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html 36 + pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html sherpa-onnx
37 37
38 - name: Run 38 - name: Run
  39 + if: false
39 shell: bash 40 shell: bash
40 run: | 41 run: |
41 cd scripts/matcha-tts/fa-en 42 cd scripts/matcha-tts/fa-en
42 ./run.sh 43 ./run.sh
43 44
  45 + - name: Generate samples
  46 + env:
  47 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  48 + shell: bash
  49 + run: |
  50 + cd scripts/matcha-tts/zh
  51 +
  52 + git config --global user.email "csukuangfj@gmail.com"
  53 + git config --global user.name "Fangjun Kuang"
  54 +
  55 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
  56 + tar xvf matcha-icefall-zh-baker.tar.bz2
  57 + rm matcha-icefall-zh-baker.tar.bz2
  58 +
  59 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
  60 +
  61 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
  62 + mkdir -p ./hf/matcha/icefall-zh/mp3
  63 +
  64 + ./generate_samples.py
  65 +
  66 + pushd hf
  67 + git pull
  68 + git add .
  69 + git commit -m 'add kokoro samples for matcha tts zh'
  70 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
  71 + popd
  72 + rm -rf hf
  73 +
  74 + ls -lh
  75 +
44 - name: Collect results ${{ matrix.version }} 76 - name: Collect results ${{ matrix.version }}
  77 + if: false
45 shell: bash 78 shell: bash
46 run: | 79 run: |
47 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 80 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
@@ -73,6 +106,7 @@ jobs: @@ -73,6 +106,7 @@ jobs:
73 ls -lh $dst2.tar.bz2 106 ls -lh $dst2.tar.bz2
74 107
75 - name: Publish to huggingface male (musa) 108 - name: Publish to huggingface male (musa)
  109 + if: false
76 env: 110 env:
77 HF_TOKEN: ${{ secrets.HF_TOKEN }} 111 HF_TOKEN: ${{ secrets.HF_TOKEN }}
78 uses: nick-fields/retry@v3 112 uses: nick-fields/retry@v3
@@ -110,6 +144,7 @@ jobs: @@ -110,6 +144,7 @@ jobs:
110 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true 144 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true
111 145
112 - name: Publish to huggingface female (khadijah) 146 - name: Publish to huggingface female (khadijah)
  147 + if: false
113 env: 148 env:
114 HF_TOKEN: ${{ secrets.HF_TOKEN }} 149 HF_TOKEN: ${{ secrets.HF_TOKEN }}
115 uses: nick-fields/retry@v3 150 uses: nick-fields/retry@v3
@@ -147,7 +182,8 @@ jobs: @@ -147,7 +182,8 @@ jobs:
147 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true 182 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true
148 183
149 - name: Release 184 - name: Release
150 - if: github.repository_owner == 'csukuangfj' 185 + # if: github.repository_owner == 'csukuangfj'
  186 + if: false
151 uses: svenstaro/upload-release-action@v2 187 uses: svenstaro/upload-release-action@v2
152 with: 188 with:
153 file_glob: true 189 file_glob: true
@@ -158,7 +194,8 @@ jobs: @@ -158,7 +194,8 @@ jobs:
158 tag: tts-models 194 tag: tts-models
159 195
160 - name: Release 196 - name: Release
161 - if: github.repository_owner == 'k2-fsa' 197 + # if: github.repository_owner == 'k2-fsa'
  198 + if: false
162 uses: svenstaro/upload-release-action@v2 199 uses: svenstaro/upload-release-action@v2
163 with: 200 with:
164 file_glob: true 201 file_glob: true
  1 +name: generate-tts-samples
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - tts-samples-2
  7 +
  8 + workflow_dispatch:
  9 +
  10 +concurrency:
  11 + group: generate-tts-samples-${{ github.ref }}
  12 + cancel-in-progress: true
  13 +
  14 +jobs:
  15 + generate_tts_samples:
  16 + name: ${{ matrix.os }}
  17 + runs-on: ${{ matrix.os }}
  18 + strategy:
  19 + fail-fast: false
  20 + matrix:
  21 + os: [ubuntu-latest]
  22 + python-version: ["3.10"]
  23 +
  24 + steps:
  25 + - uses: actions/checkout@v4
  26 + with:
  27 + fetch-depth: 0
  28 +
  29 + - name: Install Python dependencies
  30 + shell: bash
  31 + run: |
  32 + pip install "numpy<=1.26.4" sherpa-onnx soundfile
  33 +
  34 + - name: kitten
  35 + if: true
  36 + shell: bash
  37 + env:
  38 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  39 + run: |
  40 + git config --global user.email "csukuangfj@gmail.com"
  41 + git config --global user.name "Fangjun Kuang"
  42 +
  43 + cd scripts/kitten-tts
  44 + pwd=$PWD
  45 +
  46 + export GIT_LFS_SKIP_SMUDGE=1
  47 + export GIT_CLONE_PROTECTION_ACTIVE=false
  48 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
  49 + mkdir -p ./hf/kitten/v0.1/mp3
  50 + mkdir -p ./hf/kitten/v0.2/mp3
  51 +
  52 + for v in 1 2; do
  53 + pushd nano_v0_$v
  54 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_$v-fp16.tar.bz2
  55 + tar xf kitten-nano-en-v0_$v-fp16.tar.bz2
  56 + rm kitten-nano-en-v0_$v-fp16.tar.bz2
  57 +
  58 + ln -s ../hf .
  59 + python3 ./generate_samples.py
  60 + rm -rf kitten-nano-en-v0_$v-fp16
  61 + popd
  62 + done
  63 +
  64 + pushd hf
  65 + git pull
  66 + git add .
  67 + git commit -m 'add kitten tts samples'
  68 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
  69 + popd
  70 + rm -rf hf
@@ -45,6 +45,9 @@ For Rust support, please see [sherpa-rs][sherpa-rs] @@ -45,6 +45,9 @@ For Rust support, please see [sherpa-rs][sherpa-rs]
45 45
46 It also supports WebAssembly. 46 It also supports WebAssembly.
47 47
  48 +[Join our discord](https://discord.gg/fJdxzg2VbG)
  49 +
  50 +
48 ## Introduction 51 ## Introduction
49 52
50 This repository supports running the following functions **locally** 53 This repository supports running the following functions **locally**
  1 +#!/usr/bin/env python3
  2 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +"""
  4 +Generate samples for
  5 +https://k2-fsa.github.io/sherpa/onnx/tts/all/
  6 +"""
  7 +
  8 +
  9 +import sherpa_onnx
  10 +import soundfile as sf
  11 +
  12 +from generate_voices_bin import speaker2id
  13 +
  14 +config = sherpa_onnx.OfflineTtsConfig(
  15 + model=sherpa_onnx.OfflineTtsModelConfig(
  16 + kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
  17 + model="kitten-nano-en-v0_1-fp16/model.fp16.onnx",
  18 + voices="kitten-nano-en-v0_1-fp16/voices.bin",
  19 + tokens="kitten-nano-en-v0_1-fp16/tokens.txt",
  20 + data_dir="kitten-nano-en-v0_1-fp16/espeak-ng-data",
  21 + ),
  22 + num_threads=2,
  23 + ),
  24 + max_num_sentences=1,
  25 +)
  26 +
  27 +if not config.validate():
  28 + raise ValueError("Please check your config")
  29 +
  30 +tts = sherpa_onnx.OfflineTts(config)
  31 +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  32 +
  33 +for s, i in speaker2id.items():
  34 + print(s, i, len(speaker2id))
  35 + audio = tts.generate(text, sid=i, speed=1.0)
  36 +
  37 + sf.write(
  38 + f"./hf/kitten/v0.1/mp3/{i}-{s}.mp3",
  39 + audio.samples,
  40 + samplerate=audio.sample_rate,
  41 + )
  1 +#!/usr/bin/env python3
  2 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +"""
  4 +Generate samples for
  5 +https://k2-fsa.github.io/sherpa/onnx/tts/all/
  6 +"""
  7 +
  8 +
  9 +import sherpa_onnx
  10 +import soundfile as sf
  11 +
  12 +from generate_voices_bin import speaker2id
  13 +
  14 +config = sherpa_onnx.OfflineTtsConfig(
  15 + model=sherpa_onnx.OfflineTtsModelConfig(
  16 + kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
  17 + model="kitten-nano-en-v0_2-fp16/model.fp16.onnx",
  18 + voices="kitten-nano-en-v0_2-fp16/voices.bin",
  19 + tokens="kitten-nano-en-v0_2-fp16/tokens.txt",
  20 + data_dir="kitten-nano-en-v0_2-fp16/espeak-ng-data",
  21 + ),
  22 + num_threads=2,
  23 + ),
  24 + max_num_sentences=1,
  25 +)
  26 +
  27 +if not config.validate():
  28 + raise ValueError("Please check your config")
  29 +
  30 +tts = sherpa_onnx.OfflineTts(config)
  31 +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
  32 +
  33 +for s, i in speaker2id.items():
  34 + print(s, i, len(speaker2id))
  35 + audio = tts.generate(text, sid=i, speed=1.0)
  36 +
  37 + sf.write(
  38 + f"./hf/kitten/v0.2/mp3/{i}-{s}.mp3",
  39 + audio.samples,
  40 + samplerate=audio.sample_rate,
  41 + )
  1 +#!/usr/bin/env python3
  2 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +"""
  4 +Generate samples for
  5 +https://k2-fsa.github.io/sherpa/onnx/tts/all/
  6 +"""
  7 +
  8 +
  9 +import sherpa_onnx
  10 +import soundfile as sf
  11 +
  12 +config = sherpa_onnx.OfflineTtsConfig(
  13 + model=sherpa_onnx.OfflineTtsModelConfig(
  14 + matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
  15 + acoustic_model="matcha-icefall-zh-baker/model-steps-3.onnx",
  16 + vocoder="vocos-22khz-univ.onnx",
  17 + lexicon="matcha-icefall-zh-baker/lexicon.txt",
  18 + tokens="matcha-icefall-zh-baker/tokens.txt",
  19 + dict_dir="matcha-icefall-zh-baker/dict",
  20 + ),
  21 + num_threads=2,
  22 + ),
  23 + max_num_sentences=1,
  24 + rule_fsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
  25 +)
  26 +
  27 +if not config.validate():
  28 + raise ValueError("Please check your config")
  29 +
  30 +tts = sherpa_onnx.OfflineTts(config)
  31 +text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔."
  32 +
  33 +
  34 +audio = tts.generate(text, sid=0, speed=1.0)
  35 +
  36 +sf.write(
  37 + "./hf/matcha/icefall-zh/mp3/0.mp3",
  38 + audio.samples,
  39 + samplerate=audio.sample_rate,
  40 +)