Scripts to generate tts samples (#2513)

Fangjun Kuang · GitHub
Commit ba13109c8006db5d899b1561537a7d0663f58376 ba13109c 1 parent 866cbe49
.github/workflows/export-kitten.yaml
.github/workflows/export-matcha-fa-en.yaml
.github/workflows/generate-tts-samples.yaml
README.md
scripts/kitten-tts/nano_v0_1/generate_samples.py
scripts/kitten-tts/nano_v0_2/generate_samples.py
scripts/matcha-tts/zh/generate_samples.py
--- a/.github/workflows/export-kitten.yaml
查看文件 @ba13109
+++ b/.github/workflows/export-kitten.yaml
查看文件 @ba13109
@@ -117,9 +117,13 @@ jobs:
             export GIT_CLONE_PROTECTION_ACTIVE=false
             for d in ${dirs[@]}; do
-              if [ ! -d ../$d ]]; then
+              echo "d $d"
+              if [[ ! -d $d ]]; then
+                echo "$d does not exist"
                 continue
               fi
+
+              echo "$d exists"
               rm -rf huggingface
               git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
--- a/.github/workflows/export-matcha-fa-en.yaml
查看文件 @ba13109
+++ b/.github/workflows/export-matcha-fa-en.yaml
查看文件 @ba13109
@@ -3,7 +3,7 @@ name: export-matcha-fa-en-to-onnx
 on:
   push:
     branches:
-      - fix-ci
+      - tts-matcha-samples
   workflow_dispatch:
@@ -33,15 +33,48 @@ jobs:
       - name: Install Python dependencies
         shell: bash
         run: |
-          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
+          pip install "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html sherpa-onnx
       - name: Run
+        if: false
         shell: bash
         run: |
           cd scripts/matcha-tts/fa-en
           ./run.sh
+      - name: Generate samples
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        shell: bash
+        run: |
+          cd scripts/matcha-tts/zh
+
+          git config --global user.email "csukuangfj@gmail.com"
+          git config --global user.name "Fangjun Kuang"
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+          tar xvf matcha-icefall-zh-baker.tar.bz2
+          rm matcha-icefall-zh-baker.tar.bz2
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
+
+          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
+          mkdir -p ./hf/matcha/icefall-zh/mp3
+
+          ./generate_samples.py
+
+          pushd hf
+          git pull
+          git add .
+          git commit -m 'add kokoro samples for matcha tts zh'
+          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
+          popd
+          rm -rf hf
+
+          ls -lh
+
       - name: Collect results ${{ matrix.version }}
+        if: false
         shell: bash
         run: |
           curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
@@ -73,6 +106,7 @@ jobs:
           ls -lh $dst2.tar.bz2
       - name: Publish to huggingface male (musa)
+        if: false
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         uses: nick-fields/retry@v3
@@ -110,6 +144,7 @@ jobs:
             git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-musa main || true
       - name: Publish to huggingface female (khadijah)
+        if: false
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         uses: nick-fields/retry@v3
@@ -147,7 +182,8 @@ jobs:
             git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/matcha-tts-fa_en-khadijah main || true
       - name: Release
-        if: github.repository_owner == 'csukuangfj'
+        # if: github.repository_owner == 'csukuangfj'
+        if: false
         uses: svenstaro/upload-release-action@v2
         with:
           file_glob: true
@@ -158,7 +194,8 @@ jobs:
           tag: tts-models
       - name: Release
-        if: github.repository_owner == 'k2-fsa'
+        # if: github.repository_owner == 'k2-fsa'
+        if: false
         uses: svenstaro/upload-release-action@v2
         with:
           file_glob: true
--- a/.github/workflows/generate-tts-samples.yaml 0 → 100644
查看文件 @ba13109
+++ b/.github/workflows/generate-tts-samples.yaml 0 → 100644
查看文件 @ba13109
+name: generate-tts-samples
+
+on:
+  push:
+    branches:
+      - tts-samples-2
+
+  workflow_dispatch:
+
+concurrency:
+  group: generate-tts-samples-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  generate_tts_samples:
+    name: ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.10"]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          pip install "numpy<=1.26.4" sherpa-onnx soundfile
+
+      - name: kitten
+        if: true
+        shell: bash
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git config --global user.email "csukuangfj@gmail.com"
+          git config --global user.name "Fangjun Kuang"
+
+          cd scripts/kitten-tts
+          pwd=$PWD
+
+          export GIT_LFS_SKIP_SMUDGE=1
+          export GIT_CLONE_PROTECTION_ACTIVE=false
+          git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
+          mkdir -p ./hf/kitten/v0.1/mp3
+          mkdir -p ./hf/kitten/v0.2/mp3
+
+          for v in 1 2; do
+            pushd nano_v0_$v
+            curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_$v-fp16.tar.bz2
+            tar xf kitten-nano-en-v0_$v-fp16.tar.bz2
+            rm kitten-nano-en-v0_$v-fp16.tar.bz2
+
+            ln -s ../hf .
+            python3 ./generate_samples.py
+            rm -rf kitten-nano-en-v0_$v-fp16
+            popd
+          done
+
+          pushd hf
+          git pull
+          git add .
+          git commit -m 'add kitten tts samples'
+          git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
+          popd
+          rm -rf hf
--- a/README.md
查看文件 @ba13109
+++ b/README.md
查看文件 @ba13109
@@ -45,6 +45,9 @@ For Rust support, please see [sherpa-rs][sherpa-rs]
 It also supports WebAssembly.
+[Join our discord](https://discord.gg/fJdxzg2VbG)
+
+
 ## Introduction
 This repository supports running the following functions **locally**
--- a/scripts/kitten-tts/nano_v0_1/generate_samples.py 0 → 100755
查看文件 @ba13109
+++ b/scripts/kitten-tts/nano_v0_1/generate_samples.py 0 → 100755
查看文件 @ba13109
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+"""
+Generate samples for
+https://k2-fsa.github.io/sherpa/onnx/tts/all/
+"""
+
+
+import sherpa_onnx
+import soundfile as sf
+
+from generate_voices_bin import speaker2id
+
+config = sherpa_onnx.OfflineTtsConfig(
+    model=sherpa_onnx.OfflineTtsModelConfig(
+        kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
+            model="kitten-nano-en-v0_1-fp16/model.fp16.onnx",
+            voices="kitten-nano-en-v0_1-fp16/voices.bin",
+            tokens="kitten-nano-en-v0_1-fp16/tokens.txt",
+            data_dir="kitten-nano-en-v0_1-fp16/espeak-ng-data",
+        ),
+        num_threads=2,
+    ),
+    max_num_sentences=1,
+)
+
+if not config.validate():
+    raise ValueError("Please check your config")
+
+tts = sherpa_onnx.OfflineTts(config)
+text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+
+for s, i in speaker2id.items():
+    print(s, i, len(speaker2id))
+    audio = tts.generate(text, sid=i, speed=1.0)
+
+    sf.write(
+        f"./hf/kitten/v0.1/mp3/{i}-{s}.mp3",
+        audio.samples,
+        samplerate=audio.sample_rate,
+    )
--- a/scripts/kitten-tts/nano_v0_2/generate_samples.py 0 → 100755
查看文件 @ba13109
+++ b/scripts/kitten-tts/nano_v0_2/generate_samples.py 0 → 100755
查看文件 @ba13109
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+"""
+Generate samples for
+https://k2-fsa.github.io/sherpa/onnx/tts/all/
+"""
+
+
+import sherpa_onnx
+import soundfile as sf
+
+from generate_voices_bin import speaker2id
+
+config = sherpa_onnx.OfflineTtsConfig(
+    model=sherpa_onnx.OfflineTtsModelConfig(
+        kitten=sherpa_onnx.OfflineTtsKittenModelConfig(
+            model="kitten-nano-en-v0_2-fp16/model.fp16.onnx",
+            voices="kitten-nano-en-v0_2-fp16/voices.bin",
+            tokens="kitten-nano-en-v0_2-fp16/tokens.txt",
+            data_dir="kitten-nano-en-v0_2-fp16/espeak-ng-data",
+        ),
+        num_threads=2,
+    ),
+    max_num_sentences=1,
+)
+
+if not config.validate():
+    raise ValueError("Please check your config")
+
+tts = sherpa_onnx.OfflineTts(config)
+text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
+
+for s, i in speaker2id.items():
+    print(s, i, len(speaker2id))
+    audio = tts.generate(text, sid=i, speed=1.0)
+
+    sf.write(
+        f"./hf/kitten/v0.2/mp3/{i}-{s}.mp3",
+        audio.samples,
+        samplerate=audio.sample_rate,
+    )
--- a/scripts/matcha-tts/zh/generate_samples.py 0 → 100755
查看文件 @ba13109
+++ b/scripts/matcha-tts/zh/generate_samples.py 0 → 100755
查看文件 @ba13109
+#!/usr/bin/env python3
+# Copyright    2025  Xiaomi Corp.        (authors: Fangjun Kuang)
+"""
+Generate samples for
+https://k2-fsa.github.io/sherpa/onnx/tts/all/
+"""
+
+
+import sherpa_onnx
+import soundfile as sf
+
+config = sherpa_onnx.OfflineTtsConfig(
+    model=sherpa_onnx.OfflineTtsModelConfig(
+        matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
+            acoustic_model="matcha-icefall-zh-baker/model-steps-3.onnx",
+            vocoder="vocos-22khz-univ.onnx",
+            lexicon="matcha-icefall-zh-baker/lexicon.txt",
+            tokens="matcha-icefall-zh-baker/tokens.txt",
+            dict_dir="matcha-icefall-zh-baker/dict",
+        ),
+        num_threads=2,
+    ),
+    max_num_sentences=1,
+    rule_fsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
+)
+
+if not config.validate():
+    raise ValueError("Please check your config")
+
+tts = sherpa_onnx.OfflineTts(config)
+text = "某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔."
+
+
+audio = tts.generate(text, sid=0, speed=1.0)
+
+sf.write(
+    "./hf/matcha/icefall-zh/mp3/0.mp3",
+    audio.samples,
+    samplerate=audio.sample_rate,
+)