正在显示
7 个修改的文件
包含
241 行增加
和
5 行删除
| @@ -117,9 +117,13 @@ jobs: | @@ -117,9 +117,13 @@ jobs: | ||
| 117 | export GIT_CLONE_PROTECTION_ACTIVE=false | 117 | export GIT_CLONE_PROTECTION_ACTIVE=false |
| 118 | 118 | ||
| 119 | for d in ${dirs[@]}; do | 119 | for d in ${dirs[@]}; do |
| 120 | - if [ ! -d ../$d ]]; then | 120 | + echo "d $d" |
| 121 | + if [[ ! -d $d ]]; then | ||
| 122 | + echo "$d does not exist" | ||
| 121 | continue | 123 | continue |
| 122 | fi | 124 | fi |
| 125 | + | ||
| 126 | + echo "$d exists" | ||
| 123 | rm -rf huggingface | 127 | rm -rf huggingface |
| 124 | 128 | ||
| 125 | git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface | 129 | git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface |
| @@ -3,7 +3,7 @@ name: export-matcha-fa-en-to-onnx | @@ -3,7 +3,7 @@ name: export-matcha-fa-en-to-onnx | ||
| 3 | on: | 3 | on: |
| 4 | push: | 4 | push: |
| 5 | branches: | 5 | branches: |
| 6 | - - fix-ci | 6 | + - tts-matcha-samples |
| 7 | 7 | ||
| 8 | workflow_dispatch: | 8 | workflow_dispatch: |
| 9 | 9 | ||
| @@ -33,15 +33,48 @@ jobs: | @@ -33,15 +33,48 @@ jobs: | ||
| 33 | - name: Install Python dependencies | 33 | - name: Install Python dependencies |
| 34 | shell: bash | 34 | shell: bash |
| 35 | run: | | 35 | run: | |
| 36 | - pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html | 36 | + pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html sherpa-onnx |
| 37 | 37 | ||
| 38 | - name: Run | 38 | - name: Run |
| 39 | + if: false | ||
| 39 | shell: bash | 40 | shell: bash |
| 40 | run: | | 41 | run: | |
| 41 | cd scripts/matcha-tts/fa-en | 42 | cd scripts/matcha-tts/fa-en |
| 42 | ./run.sh | 43 | ./run.sh |
| 43 | 44 | ||
| 45 | + - name: Generate samples | ||
| 46 | + env: | ||
| 47 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 48 | + shell: bash | ||
| 49 | + run: | | ||
| 50 | + cd scripts/matcha-tts/zh | ||
| 51 | + | ||
| 52 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 53 | + git config --global user.name "Fangjun Kuang" | ||
| 54 | + | ||
| 55 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | ||
| 56 | + tar xvf matcha-icefall-zh-baker.tar.bz2 | ||
| 57 | + rm matcha-icefall-zh-baker.tar.bz2 | ||
| 58 | + | ||
| 59 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 60 | + | ||
| 61 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf | ||
| 62 | + mkdir -p ./hf/matcha/icefall-zh/mp3 | ||
| 63 | + | ||
| 64 | + ./generate_samples.py | ||
| 65 | + | ||
| 66 | + pushd hf | ||
| 67 | + git pull | ||
| 68 | + git add . | ||
| 69 | + git commit -m 'add kokoro samples for matcha tts zh' | ||
| 70 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main | ||
| 71 | + popd | ||
| 72 | + rm -rf hf | ||
| 73 | + | ||
| 74 | + ls -lh | ||
| 75 | + | ||
| 44 | - name: Collect results ${{ matrix.version }} | 76 | - name: Collect results ${{ matrix.version }} |
| 77 | + if: false | ||
| 45 | shell: bash | 78 | shell: bash |
| 46 | run: | | 79 | run: | |
| 47 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 | 80 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 |
| @@ -73,6 +106,7 @@ jobs: | @@ -73,6 +106,7 @@ jobs: | ||
| 73 | ls -lh $dst2.tar.bz2 | 106 | ls -lh $dst2.tar.bz2 |
| 74 | 107 | ||
| 75 | - name: Publish to huggingface male (musa) | 108 | - name: Publish to huggingface male (musa) |
| 109 | + if: false | ||
| 76 | env: | 110 | env: |
| 77 | HF_TOKEN: ${{ secrets.HF_TOKEN }} | 111 | HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 78 | uses: nick-fields/retry@v3 | 112 | uses: nick-fields/retry@v3 |
| @@ -110,6 +144,7 @@ jobs: | @@ -110,6 +144,7 @@ jobs: | ||
| 110 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true | 144 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true |
| 111 | 145 | ||
| 112 | - name: Publish to huggingface female (khadijah) | 146 | - name: Publish to huggingface female (khadijah) |
| 147 | + if: false | ||
| 113 | env: | 148 | env: |
| 114 | HF_TOKEN: ${{ secrets.HF_TOKEN }} | 149 | HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 115 | uses: nick-fields/retry@v3 | 150 | uses: nick-fields/retry@v3 |
| @@ -147,7 +182,8 @@ jobs: | @@ -147,7 +182,8 @@ jobs: | ||
| 147 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true | 182 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true |
| 148 | 183 | ||
| 149 | - name: Release | 184 | - name: Release |
| 150 | - if: github.repository_owner == 'csukuangfj' | 185 | + # if: github.repository_owner == 'csukuangfj' |
| 186 | + if: false | ||
| 151 | uses: svenstaro/upload-release-action@v2 | 187 | uses: svenstaro/upload-release-action@v2 |
| 152 | with: | 188 | with: |
| 153 | file_glob: true | 189 | file_glob: true |
| @@ -158,7 +194,8 @@ jobs: | @@ -158,7 +194,8 @@ jobs: | ||
| 158 | tag: tts-models | 194 | tag: tts-models |
| 159 | 195 | ||
| 160 | - name: Release | 196 | - name: Release |
| 161 | - if: github.repository_owner == 'k2-fsa' | 197 | + # if: github.repository_owner == 'k2-fsa' |
| 198 | + if: false | ||
| 162 | uses: svenstaro/upload-release-action@v2 | 199 | uses: svenstaro/upload-release-action@v2 |
| 163 | with: | 200 | with: |
| 164 | file_glob: true | 201 | file_glob: true |
.github/workflows/generate-tts-samples.yaml
0 → 100644
| 1 | +name: generate-tts-samples | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - tts-samples-2 | ||
| 7 | + | ||
| 8 | + workflow_dispatch: | ||
| 9 | + | ||
| 10 | +concurrency: | ||
| 11 | + group: generate-tts-samples-${{ github.ref }} | ||
| 12 | + cancel-in-progress: true | ||
| 13 | + | ||
| 14 | +jobs: | ||
| 15 | + generate_tts_samples: | ||
| 16 | + name: ${{ matrix.os }} | ||
| 17 | + runs-on: ${{ matrix.os }} | ||
| 18 | + strategy: | ||
| 19 | + fail-fast: false | ||
| 20 | + matrix: | ||
| 21 | + os: [ubuntu-latest] | ||
| 22 | + python-version: ["3.10"] | ||
| 23 | + | ||
| 24 | + steps: | ||
| 25 | + - uses: actions/checkout@v4 | ||
| 26 | + with: | ||
| 27 | + fetch-depth: 0 | ||
| 28 | + | ||
| 29 | + - name: Install Python dependencies | ||
| 30 | + shell: bash | ||
| 31 | + run: | | ||
| 32 | + pip install "numpy<=1.26.4" sherpa-onnx soundfile | ||
| 33 | + | ||
| 34 | + - name: kitten | ||
| 35 | + if: true | ||
| 36 | + shell: bash | ||
| 37 | + env: | ||
| 38 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 39 | + run: | | ||
| 40 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 41 | + git config --global user.name "Fangjun Kuang" | ||
| 42 | + | ||
| 43 | + cd scripts/kitten-tts | ||
| 44 | + pwd=$PWD | ||
| 45 | + | ||
| 46 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 47 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 48 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf | ||
| 49 | + mkdir -p ./hf/kitten/v0.1/mp3 | ||
| 50 | + mkdir -p ./hf/kitten/v0.2/mp3 | ||
| 51 | + | ||
| 52 | + for v in 1 2; do | ||
| 53 | + pushd nano_v0_$v | ||
| 54 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_$v-fp16.tar.bz2 | ||
| 55 | + tar xf kitten-nano-en-v0_$v-fp16.tar.bz2 | ||
| 56 | + rm kitten-nano-en-v0_$v-fp16.tar.bz2 | ||
| 57 | + | ||
| 58 | + ln -s ../hf . | ||
| 59 | + python3 ./generate_samples.py | ||
| 60 | + rm -rf kitten-nano-en-v0_$v-fp16 | ||
| 61 | + popd | ||
| 62 | + done | ||
| 63 | + | ||
| 64 | + pushd hf | ||
| 65 | + git pull | ||
| 66 | + git add . | ||
| 67 | + git commit -m 'add kitten tts samples' | ||
| 68 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main | ||
| 69 | + popd | ||
| 70 | + rm -rf hf |
| @@ -45,6 +45,9 @@ For Rust support, please see [sherpa-rs][sherpa-rs] | @@ -45,6 +45,9 @@ For Rust support, please see [sherpa-rs][sherpa-rs] | ||
| 45 | 45 | ||
| 46 | It also supports WebAssembly. | 46 | It also supports WebAssembly. |
| 47 | 47 | ||
| 48 | +[Join our discord](https://discord.gg/fJdxzg2VbG) | ||
| 49 | + | ||
| 50 | + | ||
| 48 | ## Introduction | 51 | ## Introduction |
| 49 | 52 | ||
| 50 | This repository supports running the following functions **locally** | 53 | This repository supports running the following functions **locally** |
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | +""" | ||
| 4 | +Generate samples for | ||
| 5 | +https://k2-fsa.github.io/sherpa/onnx/tts/all/ | ||
| 6 | +""" | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +import sherpa_onnx | ||
| 10 | +import soundfile as sf | ||
| 11 | + | ||
| 12 | +from generate_voices_bin import speaker2id | ||
| 13 | + | ||
| 14 | +config = sherpa_onnx.OfflineTtsConfig( | ||
| 15 | + model=sherpa_onnx.OfflineTtsModelConfig( | ||
| 16 | + kitten=sherpa_onnx.OfflineTtsKittenModelConfig( | ||
| 17 | + model="kitten-nano-en-v0_1-fp16/model.fp16.onnx", | ||
| 18 | + voices="kitten-nano-en-v0_1-fp16/voices.bin", | ||
| 19 | + tokens="kitten-nano-en-v0_1-fp16/tokens.txt", | ||
| 20 | + data_dir="kitten-nano-en-v0_1-fp16/espeak-ng-data", | ||
| 21 | + ), | ||
| 22 | + num_threads=2, | ||
| 23 | + ), | ||
| 24 | + max_num_sentences=1, | ||
| 25 | +) | ||
| 26 | + | ||
| 27 | +if not config.validate(): | ||
| 28 | + raise ValueError("Please check your config") | ||
| 29 | + | ||
| 30 | +tts = sherpa_onnx.OfflineTts(config) | ||
| 31 | +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
| 32 | + | ||
| 33 | +for s, i in speaker2id.items(): | ||
| 34 | + print(s, i, len(speaker2id)) | ||
| 35 | + audio = tts.generate(text, sid=i, speed=1.0) | ||
| 36 | + | ||
| 37 | + sf.write( | ||
| 38 | + f"./hf/kitten/v0.1/mp3/{i}-{s}.mp3", | ||
| 39 | + audio.samples, | ||
| 40 | + samplerate=audio.sample_rate, | ||
| 41 | + ) |
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | +""" | ||
| 4 | +Generate samples for | ||
| 5 | +https://k2-fsa.github.io/sherpa/onnx/tts/all/ | ||
| 6 | +""" | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +import sherpa_onnx | ||
| 10 | +import soundfile as sf | ||
| 11 | + | ||
| 12 | +from generate_voices_bin import speaker2id | ||
| 13 | + | ||
| 14 | +config = sherpa_onnx.OfflineTtsConfig( | ||
| 15 | + model=sherpa_onnx.OfflineTtsModelConfig( | ||
| 16 | + kitten=sherpa_onnx.OfflineTtsKittenModelConfig( | ||
| 17 | + model="kitten-nano-en-v0_2-fp16/model.fp16.onnx", | ||
| 18 | + voices="kitten-nano-en-v0_2-fp16/voices.bin", | ||
| 19 | + tokens="kitten-nano-en-v0_2-fp16/tokens.txt", | ||
| 20 | + data_dir="kitten-nano-en-v0_2-fp16/espeak-ng-data", | ||
| 21 | + ), | ||
| 22 | + num_threads=2, | ||
| 23 | + ), | ||
| 24 | + max_num_sentences=1, | ||
| 25 | +) | ||
| 26 | + | ||
| 27 | +if not config.validate(): | ||
| 28 | + raise ValueError("Please check your config") | ||
| 29 | + | ||
| 30 | +tts = sherpa_onnx.OfflineTts(config) | ||
| 31 | +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
| 32 | + | ||
| 33 | +for s, i in speaker2id.items(): | ||
| 34 | + print(s, i, len(speaker2id)) | ||
| 35 | + audio = tts.generate(text, sid=i, speed=1.0) | ||
| 36 | + | ||
| 37 | + sf.write( | ||
| 38 | + f"./hf/kitten/v0.2/mp3/{i}-{s}.mp3", | ||
| 39 | + audio.samples, | ||
| 40 | + samplerate=audio.sample_rate, | ||
| 41 | + ) |
scripts/matcha-tts/zh/generate_samples.py
0 → 100755
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | +""" | ||
| 4 | +Generate samples for | ||
| 5 | +https://k2-fsa.github.io/sherpa/onnx/tts/all/ | ||
| 6 | +""" | ||
| 7 | + | ||
| 8 | + | ||
| 9 | +import sherpa_onnx | ||
| 10 | +import soundfile as sf | ||
| 11 | + | ||
| 12 | +config = sherpa_onnx.OfflineTtsConfig( | ||
| 13 | + model=sherpa_onnx.OfflineTtsModelConfig( | ||
| 14 | + matcha=sherpa_onnx.OfflineTtsMatchaModelConfig( | ||
| 15 | + acoustic_model="matcha-icefall-zh-baker/model-steps-3.onnx", | ||
| 16 | + vocoder="vocos-22khz-univ.onnx", | ||
| 17 | + lexicon="matcha-icefall-zh-baker/lexicon.txt", | ||
| 18 | + tokens="matcha-icefall-zh-baker/tokens.txt", | ||
| 19 | + dict_dir="matcha-icefall-zh-baker/dict", | ||
| 20 | + ), | ||
| 21 | + num_threads=2, | ||
| 22 | + ), | ||
| 23 | + max_num_sentences=1, | ||
| 24 | + rule_fsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst", | ||
| 25 | +) | ||
| 26 | + | ||
| 27 | +if not config.validate(): | ||
| 28 | + raise ValueError("Please check your config") | ||
| 29 | + | ||
| 30 | +tts = sherpa_onnx.OfflineTts(config) | ||
| 31 | +text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔." | ||
| 32 | + | ||
| 33 | + | ||
| 34 | +audio = tts.generate(text, sid=0, speed=1.0) | ||
| 35 | + | ||
| 36 | +sf.write( | ||
| 37 | + "./hf/matcha/icefall-zh/mp3/0.mp3", | ||
| 38 | + audio.samples, | ||
| 39 | + samplerate=audio.sample_rate, | ||
| 40 | +) |
-
请 注册 或 登录 后发表评论