正在显示
16 个修改的文件
包含
562 行增加
和
24 行删除
.github/scripts/test-offline-tts.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -e | ||
| 4 | + | ||
| 5 | +log() { | ||
| 6 | + # This function is from espnet | ||
| 7 | + local fname=${BASH_SOURCE[1]##*/} | ||
| 8 | + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +echo "EXE is $EXE" | ||
| 12 | +echo "PATH: $PATH" | ||
| 13 | + | ||
| 14 | +which $EXE | ||
| 15 | + | ||
| 16 | +# test waves are saved in ./tts | ||
| 17 | +mkdir ./tts | ||
| 18 | + | ||
| 19 | +log "------------------------------------------------------------" | ||
| 20 | +log "vits-ljs test" | ||
| 21 | +log "------------------------------------------------------------" | ||
| 22 | + | ||
| 23 | +repo_url=https://huggingface.co/csukuangfj/vits-ljs | ||
| 24 | +log "Start testing ${repo_url}" | ||
| 25 | +repo=$(basename $repo_url) | ||
| 26 | +log "Download pretrained model from $repo_url" | ||
| 27 | +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url | ||
| 28 | +pushd $repo | ||
| 29 | +git lfs pull --include "*.onnx" | ||
| 30 | +ls -lh *.onnx | ||
| 31 | +popd | ||
| 32 | + | ||
| 33 | +$EXE \ | ||
| 34 | + --vits-model=$repo/vits-ljs.onnx \ | ||
| 35 | + --vits-lexicon=$repo/lexicon.txt \ | ||
| 36 | + --vits-tokens=$repo/tokens.txt \ | ||
| 37 | + --output-filename=./tts/vits-ljs.wav \ | ||
| 38 | + 'liliana, the most beautiful and lovely assistant of our team!' | ||
| 39 | + | ||
| 40 | +ls -lh ./tts | ||
| 41 | + | ||
| 42 | +rm -rfv $repo | ||
| 43 | + | ||
| 44 | +log "------------------------------------------------------------" | ||
| 45 | +log "vits-vctk test" | ||
| 46 | +log "------------------------------------------------------------" | ||
| 47 | + | ||
| 48 | +repo_url=https://huggingface.co/csukuangfj/vits-vctk | ||
| 49 | +log "Start testing ${repo_url}" | ||
| 50 | +repo=$(basename $repo_url) | ||
| 51 | +log "Download pretrained model from $repo_url" | ||
| 52 | +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url | ||
| 53 | +pushd $repo | ||
| 54 | +git lfs pull --include "*.onnx" | ||
| 55 | +ls -lh *.onnx | ||
| 56 | +popd | ||
| 57 | + | ||
| 58 | +for sid in 0 10 90; do | ||
| 59 | + $EXE \ | ||
| 60 | + --vits-model=$repo/vits-vctk.onnx \ | ||
| 61 | + --vits-lexicon=$repo/lexicon.txt \ | ||
| 62 | + --vits-tokens=$repo/tokens.txt \ | ||
| 63 | + --sid=$sid \ | ||
| 64 | + --output-filename=./tts/vits-vctk-${sid}.wav \ | ||
| 65 | + 'liliana, the most beautiful and lovely assistant of our team!' | ||
| 66 | +done | ||
| 67 | + | ||
| 68 | +rm -rfv $repo | ||
| 69 | + | ||
| 70 | +ls -lh tts/ | ||
| 71 | + | ||
| 72 | +log "------------------------------------------------------------" | ||
| 73 | +log "vits-zh-aishell3" | ||
| 74 | +log "------------------------------------------------------------" | ||
| 75 | + | ||
| 76 | +repo_url=https://huggingface.co/csukuangfj/vits-zh-aishell3 | ||
| 77 | +log "Start testing ${repo_url}" | ||
| 78 | +repo=$(basename $repo_url) | ||
| 79 | +log "Download pretrained model from $repo_url" | ||
| 80 | +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url | ||
| 81 | +pushd $repo | ||
| 82 | +git lfs pull --include "*.onnx" | ||
| 83 | +ls -lh *.onnx | ||
| 84 | +popd | ||
| 85 | + | ||
| 86 | +for sid in 0 10 90; do | ||
| 87 | + $EXE \ | ||
| 88 | + --vits-model=$repo/vits-aishell3.onnx \ | ||
| 89 | + --vits-lexicon=$repo/lexicon.txt \ | ||
| 90 | + --vits-tokens=$repo/tokens.txt \ | ||
| 91 | + --sid=$sid \ | ||
| 92 | + --output-filename=./tts/vits-aishell3-${sid}.wav \ | ||
| 93 | + '林美丽最美丽' | ||
| 94 | +done | ||
| 95 | + | ||
| 96 | +rm -rfv $repo | ||
| 97 | + | ||
| 98 | +ls -lh ./tts/ |
| @@ -12,6 +12,7 @@ on: | @@ -12,6 +12,7 @@ on: | ||
| 12 | - '.github/scripts/test-online-paraformer.sh' | 12 | - '.github/scripts/test-online-paraformer.sh' |
| 13 | - '.github/scripts/test-offline-transducer.sh' | 13 | - '.github/scripts/test-offline-transducer.sh' |
| 14 | - '.github/scripts/test-offline-ctc.sh' | 14 | - '.github/scripts/test-offline-ctc.sh' |
| 15 | + - '.github/scripts/test-offline-tts.sh' | ||
| 15 | - 'CMakeLists.txt' | 16 | - 'CMakeLists.txt' |
| 16 | - 'cmake/**' | 17 | - 'cmake/**' |
| 17 | - 'sherpa-onnx/csrc/*' | 18 | - 'sherpa-onnx/csrc/*' |
| @@ -26,6 +27,7 @@ on: | @@ -26,6 +27,7 @@ on: | ||
| 26 | - '.github/scripts/test-online-paraformer.sh' | 27 | - '.github/scripts/test-online-paraformer.sh' |
| 27 | - '.github/scripts/test-offline-transducer.sh' | 28 | - '.github/scripts/test-offline-transducer.sh' |
| 28 | - '.github/scripts/test-offline-ctc.sh' | 29 | - '.github/scripts/test-offline-ctc.sh' |
| 30 | + - '.github/scripts/test-offline-tts.sh' | ||
| 29 | - 'CMakeLists.txt' | 31 | - 'CMakeLists.txt' |
| 30 | - 'cmake/**' | 32 | - 'cmake/**' |
| 31 | - 'sherpa-onnx/csrc/*' | 33 | - 'sherpa-onnx/csrc/*' |
| @@ -48,7 +50,7 @@ jobs: | @@ -48,7 +50,7 @@ jobs: | ||
| 48 | build_type: [Release, Debug] | 50 | build_type: [Release, Debug] |
| 49 | 51 | ||
| 50 | steps: | 52 | steps: |
| 51 | - - uses: actions/checkout@v2 | 53 | + - uses: actions/checkout@v3 |
| 52 | with: | 54 | with: |
| 53 | fetch-depth: 0 | 55 | fetch-depth: 0 |
| 54 | 56 | ||
| @@ -75,6 +77,14 @@ jobs: | @@ -75,6 +77,14 @@ jobs: | ||
| 75 | file build/bin/sherpa-onnx | 77 | file build/bin/sherpa-onnx |
| 76 | readelf -d build/bin/sherpa-onnx | 78 | readelf -d build/bin/sherpa-onnx |
| 77 | 79 | ||
| 80 | + - name: Test offline TTS | ||
| 81 | + shell: bash | ||
| 82 | + run: | | ||
| 83 | + export PATH=$PWD/build/bin:$PATH | ||
| 84 | + export EXE=sherpa-onnx-offline-tts | ||
| 85 | + | ||
| 86 | + .github/scripts/test-offline-tts.sh | ||
| 87 | + | ||
| 78 | - name: Test online paraformer | 88 | - name: Test online paraformer |
| 79 | shell: bash | 89 | shell: bash |
| 80 | run: | | 90 | run: | |
| @@ -12,6 +12,7 @@ on: | @@ -12,6 +12,7 @@ on: | ||
| 12 | - '.github/scripts/test-online-paraformer.sh' | 12 | - '.github/scripts/test-online-paraformer.sh' |
| 13 | - '.github/scripts/test-offline-transducer.sh' | 13 | - '.github/scripts/test-offline-transducer.sh' |
| 14 | - '.github/scripts/test-offline-ctc.sh' | 14 | - '.github/scripts/test-offline-ctc.sh' |
| 15 | + - '.github/scripts/test-offline-tts.sh' | ||
| 15 | - 'CMakeLists.txt' | 16 | - 'CMakeLists.txt' |
| 16 | - 'cmake/**' | 17 | - 'cmake/**' |
| 17 | - 'sherpa-onnx/csrc/*' | 18 | - 'sherpa-onnx/csrc/*' |
| @@ -26,6 +27,7 @@ on: | @@ -26,6 +27,7 @@ on: | ||
| 26 | - '.github/scripts/test-online-paraformer.sh' | 27 | - '.github/scripts/test-online-paraformer.sh' |
| 27 | - '.github/scripts/test-offline-transducer.sh' | 28 | - '.github/scripts/test-offline-transducer.sh' |
| 28 | - '.github/scripts/test-offline-ctc.sh' | 29 | - '.github/scripts/test-offline-ctc.sh' |
| 30 | + - '.github/scripts/test-offline-tts.sh' | ||
| 29 | - 'CMakeLists.txt' | 31 | - 'CMakeLists.txt' |
| 30 | - 'cmake/**' | 32 | - 'cmake/**' |
| 31 | - 'sherpa-onnx/csrc/*' | 33 | - 'sherpa-onnx/csrc/*' |
| @@ -49,7 +51,7 @@ jobs: | @@ -49,7 +51,7 @@ jobs: | ||
| 49 | shared_lib: [ON, OFF] | 51 | shared_lib: [ON, OFF] |
| 50 | 52 | ||
| 51 | steps: | 53 | steps: |
| 52 | - - uses: actions/checkout@v2 | 54 | + - uses: actions/checkout@v3 |
| 53 | with: | 55 | with: |
| 54 | fetch-depth: 0 | 56 | fetch-depth: 0 |
| 55 | 57 | ||
| @@ -76,6 +78,14 @@ jobs: | @@ -76,6 +78,14 @@ jobs: | ||
| 76 | file build/bin/sherpa-onnx | 78 | file build/bin/sherpa-onnx |
| 77 | readelf -d build/bin/sherpa-onnx | 79 | readelf -d build/bin/sherpa-onnx |
| 78 | 80 | ||
| 81 | + - name: Test offline TTS | ||
| 82 | + shell: bash | ||
| 83 | + run: | | ||
| 84 | + export PATH=$PWD/build/bin:$PATH | ||
| 85 | + export EXE=sherpa-onnx-offline-tts | ||
| 86 | + | ||
| 87 | + .github/scripts/test-offline-tts.sh | ||
| 88 | + | ||
| 79 | - name: Test online paraformer | 89 | - name: Test online paraformer |
| 80 | shell: bash | 90 | shell: bash |
| 81 | run: | | 91 | run: | |
| @@ -150,3 +160,8 @@ jobs: | @@ -150,3 +160,8 @@ jobs: | ||
| 150 | file_glob: true | 160 | file_glob: true |
| 151 | overwrite: true | 161 | overwrite: true |
| 152 | file: sherpa-onnx-*linux-x64.tar.bz2 | 162 | file: sherpa-onnx-*linux-x64.tar.bz2 |
| 163 | + | ||
| 164 | + - uses: actions/upload-artifact@v3 | ||
| 165 | + with: | ||
| 166 | + name: tts-generated-test-files | ||
| 167 | + path: tts |
| @@ -12,6 +12,7 @@ on: | @@ -12,6 +12,7 @@ on: | ||
| 12 | - '.github/scripts/test-online-paraformer.sh' | 12 | - '.github/scripts/test-online-paraformer.sh' |
| 13 | - '.github/scripts/test-offline-transducer.sh' | 13 | - '.github/scripts/test-offline-transducer.sh' |
| 14 | - '.github/scripts/test-offline-ctc.sh' | 14 | - '.github/scripts/test-offline-ctc.sh' |
| 15 | + - '.github/scripts/test-offline-tts.sh' | ||
| 15 | - 'CMakeLists.txt' | 16 | - 'CMakeLists.txt' |
| 16 | - 'cmake/**' | 17 | - 'cmake/**' |
| 17 | - 'sherpa-onnx/csrc/*' | 18 | - 'sherpa-onnx/csrc/*' |
| @@ -24,6 +25,7 @@ on: | @@ -24,6 +25,7 @@ on: | ||
| 24 | - '.github/scripts/test-online-paraformer.sh' | 25 | - '.github/scripts/test-online-paraformer.sh' |
| 25 | - '.github/scripts/test-offline-transducer.sh' | 26 | - '.github/scripts/test-offline-transducer.sh' |
| 26 | - '.github/scripts/test-offline-ctc.sh' | 27 | - '.github/scripts/test-offline-ctc.sh' |
| 28 | + - '.github/scripts/test-offline-tts.sh' | ||
| 27 | - 'CMakeLists.txt' | 29 | - 'CMakeLists.txt' |
| 28 | - 'cmake/**' | 30 | - 'cmake/**' |
| 29 | - 'sherpa-onnx/csrc/*' | 31 | - 'sherpa-onnx/csrc/*' |
| @@ -44,7 +46,7 @@ jobs: | @@ -44,7 +46,7 @@ jobs: | ||
| 44 | build_type: [Release, Debug] | 46 | build_type: [Release, Debug] |
| 45 | 47 | ||
| 46 | steps: | 48 | steps: |
| 47 | - - uses: actions/checkout@v2 | 49 | + - uses: actions/checkout@v3 |
| 48 | with: | 50 | with: |
| 49 | fetch-depth: 0 | 51 | fetch-depth: 0 |
| 50 | 52 | ||
| @@ -74,6 +76,14 @@ jobs: | @@ -74,6 +76,14 @@ jobs: | ||
| 74 | otool -L build/bin/sherpa-onnx | 76 | otool -L build/bin/sherpa-onnx |
| 75 | otool -l build/bin/sherpa-onnx | 77 | otool -l build/bin/sherpa-onnx |
| 76 | 78 | ||
| 79 | + - name: Test offline TTS | ||
| 80 | + shell: bash | ||
| 81 | + run: | | ||
| 82 | + export PATH=$PWD/build/bin:$PATH | ||
| 83 | + export EXE=sherpa-onnx-offline-tts | ||
| 84 | + | ||
| 85 | + .github/scripts/test-offline-tts.sh | ||
| 86 | + | ||
| 77 | - name: Test online paraformer | 87 | - name: Test online paraformer |
| 78 | shell: bash | 88 | shell: bash |
| 79 | run: | | 89 | run: | |
| @@ -9,6 +9,7 @@ on: | @@ -9,6 +9,7 @@ on: | ||
| 9 | - '*' | 9 | - '*' |
| 10 | paths: | 10 | paths: |
| 11 | - '.github/workflows/pkg-config.yaml' | 11 | - '.github/workflows/pkg-config.yaml' |
| 12 | + - '.github/scripts/test-offline-tts.sh' | ||
| 12 | - 'CMakeLists.txt' | 13 | - 'CMakeLists.txt' |
| 13 | - 'cmake/**' | 14 | - 'cmake/**' |
| 14 | - 'sherpa-onnx/csrc/*' | 15 | - 'sherpa-onnx/csrc/*' |
| @@ -19,6 +20,7 @@ on: | @@ -19,6 +20,7 @@ on: | ||
| 19 | - master | 20 | - master |
| 20 | paths: | 21 | paths: |
| 21 | - '.github/workflows/pkg-config.yaml' | 22 | - '.github/workflows/pkg-config.yaml' |
| 23 | + - '.github/scripts/test-offline-tts.sh' | ||
| 22 | - 'CMakeLists.txt' | 24 | - 'CMakeLists.txt' |
| 23 | - 'cmake/**' | 25 | - 'cmake/**' |
| 24 | - 'sherpa-onnx/csrc/*' | 26 | - 'sherpa-onnx/csrc/*' |
| @@ -92,9 +94,20 @@ jobs: | @@ -92,9 +94,20 @@ jobs: | ||
| 92 | run: | | 94 | run: | |
| 93 | export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH | 95 | export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH |
| 94 | cd c-api-examples | 96 | cd c-api-examples |
| 97 | + | ||
| 95 | gcc -o decode-file-c-api $(pkg-config --cflags sherpa-onnx) ./decode-file-c-api.c $(pkg-config --libs sherpa-onnx) | 98 | gcc -o decode-file-c-api $(pkg-config --cflags sherpa-onnx) ./decode-file-c-api.c $(pkg-config --libs sherpa-onnx) |
| 96 | ./decode-file-c-api --help | 99 | ./decode-file-c-api --help |
| 97 | 100 | ||
| 101 | + gcc -o offline-tts-c-api $(pkg-config --cflags sherpa-onnx) ./offline-tts-c-api.c $(pkg-config --libs sherpa-onnx) | ||
| 102 | + ./offline-tts-c-api --help | ||
| 103 | + | ||
| 104 | + - name: Test offline TTS C API | ||
| 105 | + shell: bash | ||
| 106 | + run: | | ||
| 107 | + export PATH=$PWD/c-api-examples:$PATH | ||
| 108 | + export EXE=offline-tts-c-api | ||
| 109 | + .github/scripts/test-offline-tts.sh | ||
| 110 | + | ||
| 98 | - name: Test online transducer (C API) | 111 | - name: Test online transducer (C API) |
| 99 | shell: bash | 112 | shell: bash |
| 100 | run: | | 113 | run: | |
| @@ -102,3 +115,8 @@ jobs: | @@ -102,3 +115,8 @@ jobs: | ||
| 102 | export EXE=decode-file-c-api | 115 | export EXE=decode-file-c-api |
| 103 | 116 | ||
| 104 | .github/scripts/test-online-transducer.sh | 117 | .github/scripts/test-online-transducer.sh |
| 118 | + | ||
| 119 | + - uses: actions/upload-artifact@v3 | ||
| 120 | + with: | ||
| 121 | + name: tts-generated-test-files | ||
| 122 | + path: tts |
| @@ -12,6 +12,7 @@ on: | @@ -12,6 +12,7 @@ on: | ||
| 12 | - '.github/scripts/test-online-paraformer.sh' | 12 | - '.github/scripts/test-online-paraformer.sh' |
| 13 | - '.github/scripts/test-offline-transducer.sh' | 13 | - '.github/scripts/test-offline-transducer.sh' |
| 14 | - '.github/scripts/test-offline-ctc.sh' | 14 | - '.github/scripts/test-offline-ctc.sh' |
| 15 | + - '.github/scripts/test-offline-tts.sh' | ||
| 15 | - 'CMakeLists.txt' | 16 | - 'CMakeLists.txt' |
| 16 | - 'cmake/**' | 17 | - 'cmake/**' |
| 17 | - 'sherpa-onnx/csrc/*' | 18 | - 'sherpa-onnx/csrc/*' |
| @@ -24,6 +25,7 @@ on: | @@ -24,6 +25,7 @@ on: | ||
| 24 | - '.github/scripts/test-online-paraformer.sh' | 25 | - '.github/scripts/test-online-paraformer.sh' |
| 25 | - '.github/scripts/test-offline-transducer.sh' | 26 | - '.github/scripts/test-offline-transducer.sh' |
| 26 | - '.github/scripts/test-offline-ctc.sh' | 27 | - '.github/scripts/test-offline-ctc.sh' |
| 28 | + - '.github/scripts/test-offline-tts.sh' | ||
| 27 | - 'CMakeLists.txt' | 29 | - 'CMakeLists.txt' |
| 28 | - 'cmake/**' | 30 | - 'cmake/**' |
| 29 | - 'sherpa-onnx/csrc/*' | 31 | - 'sherpa-onnx/csrc/*' |
| @@ -64,6 +66,14 @@ jobs: | @@ -64,6 +66,14 @@ jobs: | ||
| 64 | 66 | ||
| 65 | ls -lh ./bin/Release/sherpa-onnx.exe | 67 | ls -lh ./bin/Release/sherpa-onnx.exe |
| 66 | 68 | ||
| 69 | + - name: Test offline TTS | ||
| 70 | + shell: bash | ||
| 71 | + run: | | ||
| 72 | + export PATH=$PWD/build/bin/Release:$PATH | ||
| 73 | + export EXE=sherpa-onnx-offline-tts.exe | ||
| 74 | + | ||
| 75 | + .github/scripts/test-offline-tts.sh | ||
| 76 | + | ||
| 67 | - name: Test online paraformer for windows x64 | 77 | - name: Test online paraformer for windows x64 |
| 68 | shell: bash | 78 | shell: bash |
| 69 | run: | | 79 | run: | |
| @@ -12,6 +12,7 @@ on: | @@ -12,6 +12,7 @@ on: | ||
| 12 | - '.github/scripts/test-online-paraformer.sh' | 12 | - '.github/scripts/test-online-paraformer.sh' |
| 13 | - '.github/scripts/test-offline-transducer.sh' | 13 | - '.github/scripts/test-offline-transducer.sh' |
| 14 | - '.github/scripts/test-offline-ctc.sh' | 14 | - '.github/scripts/test-offline-ctc.sh' |
| 15 | + - '.github/scripts/test-offline-tts.sh' | ||
| 15 | - 'CMakeLists.txt' | 16 | - 'CMakeLists.txt' |
| 16 | - 'cmake/**' | 17 | - 'cmake/**' |
| 17 | - 'sherpa-onnx/csrc/*' | 18 | - 'sherpa-onnx/csrc/*' |
| @@ -24,6 +25,7 @@ on: | @@ -24,6 +25,7 @@ on: | ||
| 24 | - '.github/scripts/test-online-paraformer.sh' | 25 | - '.github/scripts/test-online-paraformer.sh' |
| 25 | - '.github/scripts/test-offline-transducer.sh' | 26 | - '.github/scripts/test-offline-transducer.sh' |
| 26 | - '.github/scripts/test-offline-ctc.sh' | 27 | - '.github/scripts/test-offline-ctc.sh' |
| 28 | + - '.github/scripts/test-offline-tts.sh' | ||
| 27 | - 'CMakeLists.txt' | 29 | - 'CMakeLists.txt' |
| 28 | - 'cmake/**' | 30 | - 'cmake/**' |
| 29 | - 'sherpa-onnx/csrc/*' | 31 | - 'sherpa-onnx/csrc/*' |
| @@ -45,7 +47,7 @@ jobs: | @@ -45,7 +47,7 @@ jobs: | ||
| 45 | shared_lib: [ON, OFF] | 47 | shared_lib: [ON, OFF] |
| 46 | 48 | ||
| 47 | steps: | 49 | steps: |
| 48 | - - uses: actions/checkout@v2 | 50 | + - uses: actions/checkout@v3 |
| 49 | with: | 51 | with: |
| 50 | fetch-depth: 0 | 52 | fetch-depth: 0 |
| 51 | 53 | ||
| @@ -65,6 +67,14 @@ jobs: | @@ -65,6 +67,14 @@ jobs: | ||
| 65 | 67 | ||
| 66 | ls -lh ./bin/Release/sherpa-onnx.exe | 68 | ls -lh ./bin/Release/sherpa-onnx.exe |
| 67 | 69 | ||
| 70 | + - name: Test offline TTS | ||
| 71 | + shell: bash | ||
| 72 | + run: | | ||
| 73 | + export PATH=$PWD/build/bin/Release:$PATH | ||
| 74 | + export EXE=sherpa-onnx-offline-tts.exe | ||
| 75 | + | ||
| 76 | + .github/scripts/test-offline-tts.sh | ||
| 77 | + | ||
| 68 | - name: Test online paraformer for windows x64 | 78 | - name: Test online paraformer for windows x64 |
| 69 | shell: bash | 79 | shell: bash |
| 70 | run: | | 80 | run: | |
| @@ -12,6 +12,7 @@ on: | @@ -12,6 +12,7 @@ on: | ||
| 12 | - '.github/scripts/test-online-paraformer.sh' | 12 | - '.github/scripts/test-online-paraformer.sh' |
| 13 | - '.github/scripts/test-offline-transducer.sh' | 13 | - '.github/scripts/test-offline-transducer.sh' |
| 14 | - '.github/scripts/test-offline-ctc.sh' | 14 | - '.github/scripts/test-offline-ctc.sh' |
| 15 | + - '.github/scripts/test-offline-tts.sh' | ||
| 15 | - 'CMakeLists.txt' | 16 | - 'CMakeLists.txt' |
| 16 | - 'cmake/**' | 17 | - 'cmake/**' |
| 17 | - 'sherpa-onnx/csrc/*' | 18 | - 'sherpa-onnx/csrc/*' |
| @@ -24,6 +25,7 @@ on: | @@ -24,6 +25,7 @@ on: | ||
| 24 | - '.github/scripts/test-online-paraformer.sh' | 25 | - '.github/scripts/test-online-paraformer.sh' |
| 25 | - '.github/scripts/test-offline-transducer.sh' | 26 | - '.github/scripts/test-offline-transducer.sh' |
| 26 | - '.github/scripts/test-offline-ctc.sh' | 27 | - '.github/scripts/test-offline-ctc.sh' |
| 28 | + - '.github/scripts/test-offline-tts.sh' | ||
| 27 | - 'CMakeLists.txt' | 29 | - 'CMakeLists.txt' |
| 28 | - 'cmake/**' | 30 | - 'cmake/**' |
| 29 | - 'sherpa-onnx/csrc/*' | 31 | - 'sherpa-onnx/csrc/*' |
| @@ -45,7 +47,7 @@ jobs: | @@ -45,7 +47,7 @@ jobs: | ||
| 45 | shared_lib: [ON, OFF] | 47 | shared_lib: [ON, OFF] |
| 46 | 48 | ||
| 47 | steps: | 49 | steps: |
| 48 | - - uses: actions/checkout@v2 | 50 | + - uses: actions/checkout@v3 |
| 49 | with: | 51 | with: |
| 50 | fetch-depth: 0 | 52 | fetch-depth: 0 |
| 51 | 53 | ||
| @@ -65,6 +67,14 @@ jobs: | @@ -65,6 +67,14 @@ jobs: | ||
| 65 | 67 | ||
| 66 | ls -lh ./bin/Release/sherpa-onnx.exe | 68 | ls -lh ./bin/Release/sherpa-onnx.exe |
| 67 | 69 | ||
| 70 | + - name: Test offline TTS | ||
| 71 | + shell: bash | ||
| 72 | + run: | | ||
| 73 | + export PATH=$PWD/build/bin/Release:$PATH | ||
| 74 | + export EXE=sherpa-onnx-offline-tts.exe | ||
| 75 | + | ||
| 76 | + .github/scripts/test-offline-tts.sh | ||
| 77 | + | ||
| 68 | - name: Test online paraformer for windows x86 | 78 | - name: Test online paraformer for windows x86 |
| 69 | shell: bash | 79 | shell: bash |
| 70 | run: | | 80 | run: | |
| @@ -21,6 +21,7 @@ run-bilingual*.sh | @@ -21,6 +21,7 @@ run-bilingual*.sh | ||
| 21 | run-*-zipformer.sh | 21 | run-*-zipformer.sh |
| 22 | run-zh.sh | 22 | run-zh.sh |
| 23 | decode-file-c-api | 23 | decode-file-c-api |
| 24 | +offline-tts-c-api | ||
| 24 | run-decode-file-c-api.sh | 25 | run-decode-file-c-api.sh |
| 25 | sherpa-onnx-ffmpeg | 26 | sherpa-onnx-ffmpeg |
| 26 | build-ios | 27 | build-ios |
| @@ -3,3 +3,6 @@ include(cargs) | @@ -3,3 +3,6 @@ include(cargs) | ||
| 3 | include_directories(${CMAKE_SOURCE_DIR}) | 3 | include_directories(${CMAKE_SOURCE_DIR}) |
| 4 | add_executable(decode-file-c-api decode-file-c-api.c) | 4 | add_executable(decode-file-c-api decode-file-c-api.c) |
| 5 | target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) | 5 | target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) |
| 6 | + | ||
| 7 | +add_executable(offline-tts-c-api offline-tts-c-api.c) | ||
| 8 | +target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) |
| @@ -4,9 +4,19 @@ CUR_DIR :=$(shell pwd) | @@ -4,9 +4,19 @@ CUR_DIR :=$(shell pwd) | ||
| 4 | CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ | 4 | CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ |
| 5 | LDFLAGS := -L ../build/lib | 5 | LDFLAGS := -L ../build/lib |
| 6 | LDFLAGS += -L ../build/_deps/onnxruntime-src/lib | 6 | LDFLAGS += -L ../build/_deps/onnxruntime-src/lib |
| 7 | -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lcargs | 7 | +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs |
| 8 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib | 8 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib |
| 9 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib | 9 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib |
| 10 | 10 | ||
| 11 | +.PHONY: all clean | ||
| 12 | + | ||
| 13 | +all: decode-file-c-api offline-tts-c-api | ||
| 14 | + | ||
| 11 | decode-file-c-api: decode-file-c-api.c | 15 | decode-file-c-api: decode-file-c-api.c |
| 12 | $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) | 16 | $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) |
| 17 | + | ||
| 18 | +offline-tts-c-api: offline-tts-c-api.c | ||
| 19 | + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) | ||
| 20 | + | ||
| 21 | +clean: | ||
| 22 | + $(RM) ./decode-file-c-api ./offline-tts-c-api |
| @@ -6,4 +6,13 @@ Please refer to the documentation | @@ -6,4 +6,13 @@ Please refer to the documentation | ||
| 6 | https://k2-fsa.github.io/sherpa/onnx/c-api/index.html | 6 | https://k2-fsa.github.io/sherpa/onnx/c-api/index.html |
| 7 | for details. | 7 | for details. |
| 8 | 8 | ||
| 9 | + | ||
| 10 | +## File descriptions | ||
| 11 | + | ||
| 12 | +- [decode-file-c-api.c](./decode-file-c-api.c) This file shows how to use the C API | ||
| 13 | + for speech recognition with a streaming model. | ||
| 14 | + | ||
| 15 | +- [offline-tts-c-api.c](./offline-tts-c-api.c) This file shows how to use the C API | ||
| 16 | + to convert text to speech with a non-streaming model. | ||
| 17 | + | ||
| 9 | [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx | 18 | [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx |
c-api-examples/offline-tts-c-api.c
0 → 100644
| 1 | +// c-api-examples/offline-tts-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// This file shows how to use sherpa-onnx C API | ||
| 6 | +// to convert text to speech using an offline model. | ||
| 7 | + | ||
| 8 | +#include <stdio.h> | ||
| 9 | +#include <stdlib.h> | ||
| 10 | +#include <string.h> | ||
| 11 | + | ||
| 12 | +#include "cargs.h" | ||
| 13 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 14 | + | ||
| 15 | +static struct cag_option options[] = { | ||
| 16 | + {.identifier = 'h', | ||
| 17 | + .access_letters = "h", | ||
| 18 | + .access_name = "help", | ||
| 19 | + .description = "Show help"}, | ||
| 20 | + {.access_name = "vits-model", | ||
| 21 | + .value_name = "/path/to/xxx.onnx", | ||
| 22 | + .identifier = '0', | ||
| 23 | + .description = "Path to VITS model"}, | ||
| 24 | + {.access_name = "vits-lexicon", | ||
| 25 | + .value_name = "/path/to/lexicon.txt", | ||
| 26 | + .identifier = '1', | ||
| 27 | + .description = "Path to lexicon.txt for VITS models"}, | ||
| 28 | + {.access_name = "vits-tokens", | ||
| 29 | + .value_name = "/path/to/tokens.txt", | ||
| 30 | + .identifier = '2', | ||
| 31 | + .description = "Path to tokens.txt for VITS models"}, | ||
| 32 | + {.access_name = "vits-noise-scale", | ||
| 33 | + .value_name = "0.667", | ||
| 34 | + .identifier = '3', | ||
| 35 | + .description = "noise_scale for VITS models"}, | ||
| 36 | + {.access_name = "vits-noise-scale-w", | ||
| 37 | + .value_name = "0.8", | ||
| 38 | + .identifier = '4', | ||
| 39 | + .description = "noise_scale_w for VITS models"}, | ||
| 40 | + {.access_name = "vits-length-scale", | ||
| 41 | + .value_name = "1.0", | ||
| 42 | + .identifier = '5', | ||
| 43 | + .description = | ||
| 44 | + "length_scale for VITS models. Default to 1. You can tune it " | ||
| 45 | + "to change the speech speed. small -> faster; large -> slower. "}, | ||
| 46 | + {.access_name = "num-threads", | ||
| 47 | + .value_name = "1", | ||
| 48 | + .identifier = '6', | ||
| 49 | + .description = "Number of threads"}, | ||
| 50 | + {.access_name = "provider", | ||
| 51 | + .value_name = "cpu", | ||
| 52 | + .identifier = '7', | ||
| 53 | + .description = "Provider: cpu (default), cuda, coreml"}, | ||
| 54 | + {.access_name = "debug", | ||
| 55 | + .value_name = "0", | ||
| 56 | + .identifier = '8', | ||
| 57 | + .description = "1 to show debug messages while loading the model"}, | ||
| 58 | + {.access_name = "sid", | ||
| 59 | + .value_name = "0", | ||
| 60 | + .identifier = '9', | ||
| 61 | + .description = "Speaker ID. Default to 0. Note it is not used for " | ||
| 62 | + "single-speaker models."}, | ||
| 63 | + {.access_name = "output-filename", | ||
| 64 | + .value_name = "./generated.wav", | ||
| 65 | + .identifier = 'a', | ||
| 66 | + .description = | ||
| 67 | + "Filename to save the generated audio. Default to ./generated.wav"}, | ||
| 68 | +}; | ||
| 69 | + | ||
| 70 | +static void ShowUsage() { | ||
| 71 | + const char *kUsageMessage = | ||
| 72 | + "Offline text-to-speech with sherpa-onnx C API" | ||
| 73 | + "\n" | ||
| 74 | + "./offline-tts-c-api \\\n" | ||
| 75 | + " --vits-model=/path/to/model.onnx \\\n" | ||
| 76 | + " --vits-lexicon=/path/to/lexicon.txt \\\n" | ||
| 77 | + " --vits-tokens=/path/to/tokens.txt \\\n" | ||
| 78 | + " --sid=0 \\\n" | ||
| 79 | + " --output-filename=./generated.wav \\\n" | ||
| 80 | + " 'some text within single quotes on linux/macos or use double quotes on " | ||
| 81 | + "windows'\n" | ||
| 82 | + "\n" | ||
| 83 | + "It will generate a file ./generated.wav as specified by " | ||
| 84 | + "--output-filename.\n" | ||
| 85 | + "\n" | ||
| 86 | + "You can download a test model from\n" | ||
| 87 | + "https://huggingface.co/csukuangfj/vits-ljs\n" | ||
| 88 | + "\n" | ||
| 89 | + "For instance, you can use:\n" | ||
| 90 | + "wget " | ||
| 91 | + "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx\n" | ||
| 92 | + "wget " | ||
| 93 | + "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt\n" | ||
| 94 | + "wget " | ||
| 95 | + "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt\n" | ||
| 96 | + "\n" | ||
| 97 | + "./offline-tts-c-api \\\n" | ||
| 98 | + " --vits-model=./vits-ljs.onnx \\\n" | ||
| 99 | + " --vits-lexicon=./lexicon.txt \\\n" | ||
| 100 | + " --vits-tokens=./tokens.txt \\\n" | ||
| 101 | + " --sid=0 \\\n" | ||
| 102 | + " --output-filename=./generated.wav \\\n" | ||
| 103 | + " 'liliana, the most beautiful and lovely assistant of our team!'\n" | ||
| 104 | + "\n" | ||
| 105 | + "Please see\n" | ||
| 106 | + "https://k2-fsa.github.io/sherpa/onnx/tts/index.html\n" | ||
| 107 | + "or details.\n\n"; | ||
| 108 | + | ||
| 109 | + fprintf(stderr, "%s", kUsageMessage); | ||
| 110 | + cag_option_print(options, CAG_ARRAY_SIZE(options), stderr); | ||
| 111 | + exit(0); | ||
| 112 | +} | ||
| 113 | + | ||
| 114 | +int32_t main(int32_t argc, char *argv[]) { | ||
| 115 | + cag_option_context context; | ||
| 116 | + char identifier; | ||
| 117 | + const char *value; | ||
| 118 | + | ||
| 119 | + cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv); | ||
| 120 | + | ||
| 121 | + SherpaOnnxOfflineTtsConfig config; | ||
| 122 | + memset(&config, 0, sizeof(config)); | ||
| 123 | + | ||
| 124 | + int32_t sid = 0; | ||
| 125 | + const char *filename = strdup("./generated.wav"); | ||
| 126 | + const char *text; | ||
| 127 | + | ||
| 128 | + while (cag_option_fetch(&context)) { | ||
| 129 | + identifier = cag_option_get(&context); | ||
| 130 | + value = cag_option_get_value(&context); | ||
| 131 | + switch (identifier) { | ||
| 132 | + case '0': | ||
| 133 | + config.model.vits.model = value; | ||
| 134 | + break; | ||
| 135 | + case '1': | ||
| 136 | + config.model.vits.lexicon = value; | ||
| 137 | + break; | ||
| 138 | + case '2': | ||
| 139 | + config.model.vits.tokens = value; | ||
| 140 | + break; | ||
| 141 | + case '3': | ||
| 142 | + config.model.vits.noise_scale = atof(value); | ||
| 143 | + break; | ||
| 144 | + case '4': | ||
| 145 | + config.model.vits.noise_scale_w = atof(value); | ||
| 146 | + break; | ||
| 147 | + case '5': | ||
| 148 | + config.model.vits.length_scale = atof(value); | ||
| 149 | + break; | ||
| 150 | + case '6': | ||
| 151 | + config.model.num_threads = atoi(value); | ||
| 152 | + break; | ||
| 153 | + case '7': | ||
| 154 | + config.model.provider = value; | ||
| 155 | + break; | ||
| 156 | + case '8': | ||
| 157 | + config.model.debug = atoi(value); | ||
| 158 | + break; | ||
| 159 | + case '9': | ||
| 160 | + sid = atoi(value); | ||
| 161 | + break; | ||
| 162 | + case 'a': | ||
| 163 | + free((void *)filename); | ||
| 164 | + filename = strdup(value); | ||
| 165 | + break; | ||
| 166 | + case 'h': | ||
| 167 | + // fall through | ||
| 168 | + default: | ||
| 169 | + ShowUsage(); | ||
| 170 | + } | ||
| 171 | + } | ||
| 172 | + | ||
| 173 | + if (!config.model.vits.model || !config.model.vits.lexicon || | ||
| 174 | + !config.model.vits.tokens) { | ||
| 175 | + ShowUsage(); | ||
| 176 | + } | ||
| 177 | + | ||
| 178 | + // the last arg is the text | ||
| 179 | + text = argv[argc - 1]; | ||
| 180 | + if (text[0] == '-') { | ||
| 181 | + fprintf(stderr, "\n***Please input your text!***\n\n"); | ||
| 182 | + fprintf(stderr, "\n---------------Usage---------------\n\n"); | ||
| 183 | + ShowUsage(); | ||
| 184 | + } | ||
| 185 | + | ||
| 186 | + SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); | ||
| 187 | + | ||
| 188 | + const SherpaOnnxGeneratedAudio *audio = | ||
| 189 | + SherpaOnnxOfflineTtsGenerate(tts, text, sid); | ||
| 190 | + | ||
| 191 | + SherpaOnnxDestroyOfflineWriteWave(audio, filename); | ||
| 192 | + | ||
| 193 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); | ||
| 194 | + SherpaOnnxDestroyOfflineTts(tts); | ||
| 195 | + | ||
| 196 | + fprintf(stderr, "Input text is: %s\n", text); | ||
| 197 | + fprintf(stderr, "Speaker ID is is: %d\n", sid); | ||
| 198 | + fprintf(stderr, "Saved to: %s\n", filename); | ||
| 199 | + | ||
| 200 | + free((void *)filename); | ||
| 201 | + | ||
| 202 | + return 0; | ||
| 203 | +} |
| @@ -12,8 +12,10 @@ | @@ -12,8 +12,10 @@ | ||
| 12 | #include "sherpa-onnx/csrc/circular-buffer.h" | 12 | #include "sherpa-onnx/csrc/circular-buffer.h" |
| 13 | #include "sherpa-onnx/csrc/display.h" | 13 | #include "sherpa-onnx/csrc/display.h" |
| 14 | #include "sherpa-onnx/csrc/offline-recognizer.h" | 14 | #include "sherpa-onnx/csrc/offline-recognizer.h" |
| 15 | +#include "sherpa-onnx/csrc/offline-tts.h" | ||
| 15 | #include "sherpa-onnx/csrc/online-recognizer.h" | 16 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 16 | #include "sherpa-onnx/csrc/voice-activity-detector.h" | 17 | #include "sherpa-onnx/csrc/voice-activity-detector.h" |
| 18 | +#include "sherpa-onnx/csrc/wave-writer.h" | ||
| 17 | 19 | ||
| 18 | struct SherpaOnnxOnlineRecognizer { | 20 | struct SherpaOnnxOnlineRecognizer { |
| 19 | std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl; | 21 | std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl; |
| @@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( | @@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( | ||
| 204 | } | 206 | } |
| 205 | 207 | ||
| 206 | void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) { | 208 | void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) { |
| 207 | - delete[] r->text; | ||
| 208 | - delete[] r->json; | ||
| 209 | - delete[] r->tokens; | ||
| 210 | - delete[] r->tokens_arr; | ||
| 211 | - delete[] r->timestamps; | ||
| 212 | - delete r; | 209 | + if (r) { |
| 210 | + delete[] r->text; | ||
| 211 | + delete[] r->json; | ||
| 212 | + delete[] r->tokens; | ||
| 213 | + delete[] r->tokens_arr; | ||
| 214 | + delete[] r->timestamps; | ||
| 215 | + delete r; | ||
| 216 | + } | ||
| 213 | } | 217 | } |
| 214 | 218 | ||
| 215 | void Reset(SherpaOnnxOnlineRecognizer *recognizer, | 219 | void Reset(SherpaOnnxOnlineRecognizer *recognizer, |
| @@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( | @@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( | ||
| 385 | 389 | ||
| 386 | void DestroyOfflineRecognizerResult( | 390 | void DestroyOfflineRecognizerResult( |
| 387 | const SherpaOnnxOfflineRecognizerResult *r) { | 391 | const SherpaOnnxOfflineRecognizerResult *r) { |
| 388 | - delete[] r->text; | ||
| 389 | - delete[] r->timestamps; | ||
| 390 | - delete r; | 392 | + if (r) { |
| 393 | + delete[] r->text; | ||
| 394 | + delete[] r->timestamps; | ||
| 395 | + delete r; | ||
| 396 | + } | ||
| 391 | } | 397 | } |
| 392 | 398 | ||
| 393 | // ============================================================ | 399 | // ============================================================ |
| @@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected( | @@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected( | ||
| 493 | return p->impl->IsSpeechDetected(); | 499 | return p->impl->IsSpeechDetected(); |
| 494 | } | 500 | } |
| 495 | 501 | ||
| 496 | -void SherpaOnnxVoiceActivityDetectorPop( | ||
| 497 | - SherpaOnnxVoiceActivityDetector *p) { | 502 | +void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) { |
| 498 | p->impl->Pop(); | 503 | p->impl->Pop(); |
| 499 | } | 504 | } |
| 500 | 505 | ||
| 501 | -void SherpaOnnxVoiceActivityDetectorClear( | ||
| 502 | - SherpaOnnxVoiceActivityDetector *p) { | 506 | +void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) { |
| 503 | p->impl->Clear(); | 507 | p->impl->Clear(); |
| 504 | } | 508 | } |
| 505 | 509 | ||
| 506 | -const SherpaOnnxSpeechSegment * | ||
| 507 | -SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) { | 510 | +const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront( |
| 511 | + SherpaOnnxVoiceActivityDetector *p) { | ||
| 508 | const sherpa_onnx::SpeechSegment &segment = p->impl->Front(); | 512 | const sherpa_onnx::SpeechSegment &segment = p->impl->Front(); |
| 509 | 513 | ||
| 510 | SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment; | 514 | SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment; |
| @@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) { | @@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) { | ||
| 517 | } | 521 | } |
| 518 | 522 | ||
| 519 | void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) { | 523 | void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) { |
| 520 | - delete[] p->samples; | ||
| 521 | - delete p; | 524 | + if (p) { |
| 525 | + delete[] p->samples; | ||
| 526 | + delete p; | ||
| 527 | + } | ||
| 522 | } | 528 | } |
| 523 | 529 | ||
| 524 | void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { | 530 | void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { |
| 525 | p->impl->Reset(); | 531 | p->impl->Reset(); |
| 526 | } | 532 | } |
| 533 | + | ||
| 534 | +struct SherpaOnnxOfflineTts { | ||
| 535 | + std::unique_ptr<sherpa_onnx::OfflineTts> impl; | ||
| 536 | +}; | ||
| 537 | + | ||
| 538 | +SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 539 | + const SherpaOnnxOfflineTtsConfig *config) { | ||
| 540 | + sherpa_onnx::OfflineTtsConfig tts_config; | ||
| 541 | + | ||
| 542 | + tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, ""); | ||
| 543 | + tts_config.model.vits.lexicon = | ||
| 544 | + SHERPA_ONNX_OR(config->model.vits.lexicon, ""); | ||
| 545 | + tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, ""); | ||
| 546 | + tts_config.model.vits.noise_scale = | ||
| 547 | + SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667); | ||
| 548 | + tts_config.model.vits.noise_scale_w = | ||
| 549 | + SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8); | ||
| 550 | + tts_config.model.vits.length_scale = | ||
| 551 | + SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0); | ||
| 552 | + | ||
| 553 | + tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | ||
| 554 | + tts_config.model.debug = config->model.debug; | ||
| 555 | + tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | ||
| 556 | + | ||
| 557 | + if (tts_config.model.debug) { | ||
| 558 | + fprintf(stderr, "%s\n", tts_config.ToString().c_str()); | ||
| 559 | + } | ||
| 560 | + | ||
| 561 | + SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; | ||
| 562 | + | ||
| 563 | + tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config); | ||
| 564 | + | ||
| 565 | + return tts; | ||
| 566 | +} | ||
| 567 | + | ||
| 568 | +void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; } | ||
| 569 | + | ||
| 570 | +const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( | ||
| 571 | + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) { | ||
| 572 | + sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid); | ||
| 573 | + | ||
| 574 | + if (audio.samples.empty()) { | ||
| 575 | + return nullptr; | ||
| 576 | + } | ||
| 577 | + | ||
| 578 | + SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio; | ||
| 579 | + | ||
| 580 | + float *samples = new float[audio.samples.size()]; | ||
| 581 | + std::copy(audio.samples.begin(), audio.samples.end(), samples); | ||
| 582 | + | ||
| 583 | + ans->samples = samples; | ||
| 584 | + ans->n = audio.samples.size(); | ||
| 585 | + ans->sample_rate = audio.sample_rate; | ||
| 586 | + | ||
| 587 | + return ans; | ||
| 588 | +} | ||
| 589 | + | ||
| 590 | +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( | ||
| 591 | + const SherpaOnnxGeneratedAudio *p) { | ||
| 592 | + if (p) { | ||
| 593 | + delete[] p->samples; | ||
| 594 | + delete p; | ||
| 595 | + } | ||
| 596 | +} | ||
| 597 | + | ||
| 598 | +int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p, | ||
| 599 | + const char *filename) { | ||
| 600 | + return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n); | ||
| 601 | +} |
| @@ -595,6 +595,62 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( | @@ -595,6 +595,62 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( | ||
| 595 | SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset( | 595 | SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset( |
| 596 | SherpaOnnxVoiceActivityDetector *p); | 596 | SherpaOnnxVoiceActivityDetector *p); |
| 597 | 597 | ||
| 598 | +// ============================================================ | ||
| 599 | +// For offline Text-to-Speech (i.e., non-streaming TTS) | ||
| 600 | +// ============================================================ | ||
| 601 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig { | ||
| 602 | + const char *model; | ||
| 603 | + const char *lexicon; | ||
| 604 | + const char *tokens; | ||
| 605 | + | ||
| 606 | + float noise_scale; | ||
| 607 | + float noise_scale_w; | ||
| 608 | + float length_scale; // < 1, faster in speed; > 1, slower in speed | ||
| 609 | +} SherpaOnnxOfflineTtsVitsModelConfig; | ||
| 610 | + | ||
| 611 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig { | ||
| 612 | + SherpaOnnxOfflineTtsVitsModelConfig vits; | ||
| 613 | + int32_t num_threads; | ||
| 614 | + int32_t debug; | ||
| 615 | + const char *provider; | ||
| 616 | +} SherpaOnnxOfflineTtsModelConfig; | ||
| 617 | + | ||
| 618 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { | ||
| 619 | + SherpaOnnxOfflineTtsModelConfig model; | ||
| 620 | +} SherpaOnnxOfflineTtsConfig; | ||
| 621 | + | ||
| 622 | +SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { | ||
| 623 | + const float *samples; // in the range [-1, 1] | ||
| 624 | + int32_t n; // number of samples | ||
| 625 | + int32_t sample_rate; | ||
| 626 | +} SherpaOnnxGeneratedAudio; | ||
| 627 | + | ||
| 628 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts; | ||
| 629 | + | ||
| 630 | +// Create an instance of offline TTS. The user has to use DestroyOfflineTts() | ||
| 631 | +// to free the returned pointer to avoid memory leak. | ||
| 632 | +SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 633 | + const SherpaOnnxOfflineTtsConfig *config); | ||
| 634 | + | ||
| 635 | +// Free the pointer returned by CreateOfflineTts() | ||
| 636 | +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts); | ||
| 637 | + | ||
| 638 | +// Generate audio from the given text and speaker id (sid). | ||
| 639 | +// The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned | ||
| 640 | +// pointer to avoid memory leak. | ||
| 641 | +SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( | ||
| 642 | + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid); | ||
| 643 | + | ||
| 644 | +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( | ||
| 645 | + const SherpaOnnxGeneratedAudio *p); | ||
| 646 | + | ||
| 647 | +// Write the generated audio to a wave file. | ||
| 648 | +// The saved wave file contains a single channel and has 16-bit samples. | ||
| 649 | +// | ||
| 650 | +// Return 1 if the write succeeded; return 0 on failure. | ||
| 651 | +SHERPA_ONNX_API int32_t SherpaOnnxDestroyOfflineWriteWave( | ||
| 652 | + const SherpaOnnxGeneratedAudio *p, const char *filename); | ||
| 653 | + | ||
| 598 | #if defined(__GNUC__) | 654 | #if defined(__GNUC__) |
| 599 | #pragma GCC diagnostic pop | 655 | #pragma GCC diagnostic pop |
| 600 | #endif | 656 | #endif |
| @@ -40,7 +40,7 @@ wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt | @@ -40,7 +40,7 @@ wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt | ||
| 40 | 40 | ||
| 41 | Please see | 41 | Please see |
| 42 | https://k2-fsa.github.io/sherpa/onnx/tts/index.html | 42 | https://k2-fsa.github.io/sherpa/onnx/tts/index.html |
| 43 | -or detailes. | 43 | +or details. |
| 44 | )usage"; | 44 | )usage"; |
| 45 | 45 | ||
| 46 | sherpa_onnx::ParseOptions po(kUsageMessage); | 46 | sherpa_onnx::ParseOptions po(kUsageMessage); |
-
请 注册 或 登录 后发表评论