Fangjun Kuang
Committed by GitHub

Add C API for offline TTS. (#373)

  1 +#!/usr/bin/env bash
  2 +
  3 +set -e
  4 +
  5 +log() {
  6 + # This function is from espnet
  7 + local fname=${BASH_SOURCE[1]##*/}
  8 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  9 +}
  10 +
  11 +echo "EXE is $EXE"
  12 +echo "PATH: $PATH"
  13 +
  14 +which $EXE
  15 +
  16 +# test waves are saved in ./tts
  17 +mkdir ./tts
  18 +
  19 +log "------------------------------------------------------------"
  20 +log "vits-ljs test"
  21 +log "------------------------------------------------------------"
  22 +
  23 +repo_url=https://huggingface.co/csukuangfj/vits-ljs
  24 +log "Start testing ${repo_url}"
  25 +repo=$(basename $repo_url)
  26 +log "Download pretrained model from $repo_url"
  27 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  28 +pushd $repo
  29 +git lfs pull --include "*.onnx"
  30 +ls -lh *.onnx
  31 +popd
  32 +
  33 +$EXE \
  34 + --vits-model=$repo/vits-ljs.onnx \
  35 + --vits-lexicon=$repo/lexicon.txt \
  36 + --vits-tokens=$repo/tokens.txt \
  37 + --output-filename=./tts/vits-ljs.wav \
  38 + 'liliana, the most beautiful and lovely assistant of our team!'
  39 +
  40 +ls -lh ./tts
  41 +
  42 +rm -rfv $repo
  43 +
  44 +log "------------------------------------------------------------"
  45 +log "vits-vctk test"
  46 +log "------------------------------------------------------------"
  47 +
  48 +repo_url=https://huggingface.co/csukuangfj/vits-vctk
  49 +log "Start testing ${repo_url}"
  50 +repo=$(basename $repo_url)
  51 +log "Download pretrained model from $repo_url"
  52 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  53 +pushd $repo
  54 +git lfs pull --include "*.onnx"
  55 +ls -lh *.onnx
  56 +popd
  57 +
  58 +for sid in 0 10 90; do
  59 + $EXE \
  60 + --vits-model=$repo/vits-vctk.onnx \
  61 + --vits-lexicon=$repo/lexicon.txt \
  62 + --vits-tokens=$repo/tokens.txt \
  63 + --sid=$sid \
  64 + --output-filename=./tts/vits-vctk-${sid}.wav \
  65 + 'liliana, the most beautiful and lovely assistant of our team!'
  66 +done
  67 +
  68 +rm -rfv $repo
  69 +
  70 +ls -lh tts/
  71 +
  72 +log "------------------------------------------------------------"
  73 +log "vits-zh-aishell3"
  74 +log "------------------------------------------------------------"
  75 +
  76 +repo_url=https://huggingface.co/csukuangfj/vits-zh-aishell3
  77 +log "Start testing ${repo_url}"
  78 +repo=$(basename $repo_url)
  79 +log "Download pretrained model from $repo_url"
  80 +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  81 +pushd $repo
  82 +git lfs pull --include "*.onnx"
  83 +ls -lh *.onnx
  84 +popd
  85 +
  86 +for sid in 0 10 90; do
  87 + $EXE \
  88 + --vits-model=$repo/vits-aishell3.onnx \
  89 + --vits-lexicon=$repo/lexicon.txt \
  90 + --vits-tokens=$repo/tokens.txt \
  91 + --sid=$sid \
  92 + --output-filename=./tts/vits-aishell3-${sid}.wav \
  93 + '林美丽最美丽'
  94 +done
  95 +
  96 +rm -rfv $repo
  97 +
  98 +ls -lh ./tts/
@@ -12,6 +12,7 @@ on: @@ -12,6 +12,7 @@ on:
12 - '.github/scripts/test-online-paraformer.sh' 12 - '.github/scripts/test-online-paraformer.sh'
13 - '.github/scripts/test-offline-transducer.sh' 13 - '.github/scripts/test-offline-transducer.sh'
14 - '.github/scripts/test-offline-ctc.sh' 14 - '.github/scripts/test-offline-ctc.sh'
  15 + - '.github/scripts/test-offline-tts.sh'
15 - 'CMakeLists.txt' 16 - 'CMakeLists.txt'
16 - 'cmake/**' 17 - 'cmake/**'
17 - 'sherpa-onnx/csrc/*' 18 - 'sherpa-onnx/csrc/*'
@@ -26,6 +27,7 @@ on: @@ -26,6 +27,7 @@ on:
26 - '.github/scripts/test-online-paraformer.sh' 27 - '.github/scripts/test-online-paraformer.sh'
27 - '.github/scripts/test-offline-transducer.sh' 28 - '.github/scripts/test-offline-transducer.sh'
28 - '.github/scripts/test-offline-ctc.sh' 29 - '.github/scripts/test-offline-ctc.sh'
  30 + - '.github/scripts/test-offline-tts.sh'
29 - 'CMakeLists.txt' 31 - 'CMakeLists.txt'
30 - 'cmake/**' 32 - 'cmake/**'
31 - 'sherpa-onnx/csrc/*' 33 - 'sherpa-onnx/csrc/*'
@@ -48,7 +50,7 @@ jobs: @@ -48,7 +50,7 @@ jobs:
48 build_type: [Release, Debug] 50 build_type: [Release, Debug]
49 51
50 steps: 52 steps:
51 - - uses: actions/checkout@v2 53 + - uses: actions/checkout@v3
52 with: 54 with:
53 fetch-depth: 0 55 fetch-depth: 0
54 56
@@ -75,6 +77,14 @@ jobs: @@ -75,6 +77,14 @@ jobs:
75 file build/bin/sherpa-onnx 77 file build/bin/sherpa-onnx
76 readelf -d build/bin/sherpa-onnx 78 readelf -d build/bin/sherpa-onnx
77 79
  80 + - name: Test offline TTS
  81 + shell: bash
  82 + run: |
  83 + export PATH=$PWD/build/bin:$PATH
  84 + export EXE=sherpa-onnx-offline-tts
  85 +
  86 + .github/scripts/test-offline-tts.sh
  87 +
78 - name: Test online paraformer 88 - name: Test online paraformer
79 shell: bash 89 shell: bash
80 run: | 90 run: |
@@ -12,6 +12,7 @@ on: @@ -12,6 +12,7 @@ on:
12 - '.github/scripts/test-online-paraformer.sh' 12 - '.github/scripts/test-online-paraformer.sh'
13 - '.github/scripts/test-offline-transducer.sh' 13 - '.github/scripts/test-offline-transducer.sh'
14 - '.github/scripts/test-offline-ctc.sh' 14 - '.github/scripts/test-offline-ctc.sh'
  15 + - '.github/scripts/test-offline-tts.sh'
15 - 'CMakeLists.txt' 16 - 'CMakeLists.txt'
16 - 'cmake/**' 17 - 'cmake/**'
17 - 'sherpa-onnx/csrc/*' 18 - 'sherpa-onnx/csrc/*'
@@ -26,6 +27,7 @@ on: @@ -26,6 +27,7 @@ on:
26 - '.github/scripts/test-online-paraformer.sh' 27 - '.github/scripts/test-online-paraformer.sh'
27 - '.github/scripts/test-offline-transducer.sh' 28 - '.github/scripts/test-offline-transducer.sh'
28 - '.github/scripts/test-offline-ctc.sh' 29 - '.github/scripts/test-offline-ctc.sh'
  30 + - '.github/scripts/test-offline-tts.sh'
29 - 'CMakeLists.txt' 31 - 'CMakeLists.txt'
30 - 'cmake/**' 32 - 'cmake/**'
31 - 'sherpa-onnx/csrc/*' 33 - 'sherpa-onnx/csrc/*'
@@ -49,7 +51,7 @@ jobs: @@ -49,7 +51,7 @@ jobs:
49 shared_lib: [ON, OFF] 51 shared_lib: [ON, OFF]
50 52
51 steps: 53 steps:
52 - - uses: actions/checkout@v2 54 + - uses: actions/checkout@v3
53 with: 55 with:
54 fetch-depth: 0 56 fetch-depth: 0
55 57
@@ -76,6 +78,14 @@ jobs: @@ -76,6 +78,14 @@ jobs:
76 file build/bin/sherpa-onnx 78 file build/bin/sherpa-onnx
77 readelf -d build/bin/sherpa-onnx 79 readelf -d build/bin/sherpa-onnx
78 80
  81 + - name: Test offline TTS
  82 + shell: bash
  83 + run: |
  84 + export PATH=$PWD/build/bin:$PATH
  85 + export EXE=sherpa-onnx-offline-tts
  86 +
  87 + .github/scripts/test-offline-tts.sh
  88 +
79 - name: Test online paraformer 89 - name: Test online paraformer
80 shell: bash 90 shell: bash
81 run: | 91 run: |
@@ -150,3 +160,8 @@ jobs: @@ -150,3 +160,8 @@ jobs:
150 file_glob: true 160 file_glob: true
151 overwrite: true 161 overwrite: true
152 file: sherpa-onnx-*linux-x64.tar.bz2 162 file: sherpa-onnx-*linux-x64.tar.bz2
  163 +
  164 + - uses: actions/upload-artifact@v3
  165 + with:
  166 + name: tts-generated-test-files
  167 + path: tts
@@ -12,6 +12,7 @@ on: @@ -12,6 +12,7 @@ on:
12 - '.github/scripts/test-online-paraformer.sh' 12 - '.github/scripts/test-online-paraformer.sh'
13 - '.github/scripts/test-offline-transducer.sh' 13 - '.github/scripts/test-offline-transducer.sh'
14 - '.github/scripts/test-offline-ctc.sh' 14 - '.github/scripts/test-offline-ctc.sh'
  15 + - '.github/scripts/test-offline-tts.sh'
15 - 'CMakeLists.txt' 16 - 'CMakeLists.txt'
16 - 'cmake/**' 17 - 'cmake/**'
17 - 'sherpa-onnx/csrc/*' 18 - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on: @@ -24,6 +25,7 @@ on:
24 - '.github/scripts/test-online-paraformer.sh' 25 - '.github/scripts/test-online-paraformer.sh'
25 - '.github/scripts/test-offline-transducer.sh' 26 - '.github/scripts/test-offline-transducer.sh'
26 - '.github/scripts/test-offline-ctc.sh' 27 - '.github/scripts/test-offline-ctc.sh'
  28 + - '.github/scripts/test-offline-tts.sh'
27 - 'CMakeLists.txt' 29 - 'CMakeLists.txt'
28 - 'cmake/**' 30 - 'cmake/**'
29 - 'sherpa-onnx/csrc/*' 31 - 'sherpa-onnx/csrc/*'
@@ -44,7 +46,7 @@ jobs: @@ -44,7 +46,7 @@ jobs:
44 build_type: [Release, Debug] 46 build_type: [Release, Debug]
45 47
46 steps: 48 steps:
47 - - uses: actions/checkout@v2 49 + - uses: actions/checkout@v3
48 with: 50 with:
49 fetch-depth: 0 51 fetch-depth: 0
50 52
@@ -74,6 +76,14 @@ jobs: @@ -74,6 +76,14 @@ jobs:
74 otool -L build/bin/sherpa-onnx 76 otool -L build/bin/sherpa-onnx
75 otool -l build/bin/sherpa-onnx 77 otool -l build/bin/sherpa-onnx
76 78
  79 + - name: Test offline TTS
  80 + shell: bash
  81 + run: |
  82 + export PATH=$PWD/build/bin:$PATH
  83 + export EXE=sherpa-onnx-offline-tts
  84 +
  85 + .github/scripts/test-offline-tts.sh
  86 +
77 - name: Test online paraformer 87 - name: Test online paraformer
78 shell: bash 88 shell: bash
79 run: | 89 run: |
@@ -9,6 +9,7 @@ on: @@ -9,6 +9,7 @@ on:
9 - '*' 9 - '*'
10 paths: 10 paths:
11 - '.github/workflows/pkg-config.yaml' 11 - '.github/workflows/pkg-config.yaml'
  12 + - '.github/scripts/test-offline-tts.sh'
12 - 'CMakeLists.txt' 13 - 'CMakeLists.txt'
13 - 'cmake/**' 14 - 'cmake/**'
14 - 'sherpa-onnx/csrc/*' 15 - 'sherpa-onnx/csrc/*'
@@ -19,6 +20,7 @@ on: @@ -19,6 +20,7 @@ on:
19 - master 20 - master
20 paths: 21 paths:
21 - '.github/workflows/pkg-config.yaml' 22 - '.github/workflows/pkg-config.yaml'
  23 + - '.github/scripts/test-offline-tts.sh'
22 - 'CMakeLists.txt' 24 - 'CMakeLists.txt'
23 - 'cmake/**' 25 - 'cmake/**'
24 - 'sherpa-onnx/csrc/*' 26 - 'sherpa-onnx/csrc/*'
@@ -92,9 +94,20 @@ jobs: @@ -92,9 +94,20 @@ jobs:
92 run: | 94 run: |
93 export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH 95 export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH
94 cd c-api-examples 96 cd c-api-examples
  97 +
95 gcc -o decode-file-c-api $(pkg-config --cflags sherpa-onnx) ./decode-file-c-api.c $(pkg-config --libs sherpa-onnx) 98 gcc -o decode-file-c-api $(pkg-config --cflags sherpa-onnx) ./decode-file-c-api.c $(pkg-config --libs sherpa-onnx)
96 ./decode-file-c-api --help 99 ./decode-file-c-api --help
97 100
  101 + gcc -o offline-tts-c-api $(pkg-config --cflags sherpa-onnx) ./offline-tts-c-api.c $(pkg-config --libs sherpa-onnx)
  102 + ./offline-tts-c-api --help
  103 +
  104 + - name: Test offline TTS C API
  105 + shell: bash
  106 + run: |
  107 + export PATH=$PWD/c-api-examples:$PATH
  108 + export EXE=offline-tts-c-api
  109 + .github/scripts/test-offline-tts.sh
  110 +
98 - name: Test online transducer (C API) 111 - name: Test online transducer (C API)
99 shell: bash 112 shell: bash
100 run: | 113 run: |
@@ -102,3 +115,8 @@ jobs: @@ -102,3 +115,8 @@ jobs:
102 export EXE=decode-file-c-api 115 export EXE=decode-file-c-api
103 116
104 .github/scripts/test-online-transducer.sh 117 .github/scripts/test-online-transducer.sh
  118 +
  119 + - uses: actions/upload-artifact@v3
  120 + with:
  121 + name: tts-generated-test-files
  122 + path: tts
@@ -12,6 +12,7 @@ on: @@ -12,6 +12,7 @@ on:
12 - '.github/scripts/test-online-paraformer.sh' 12 - '.github/scripts/test-online-paraformer.sh'
13 - '.github/scripts/test-offline-transducer.sh' 13 - '.github/scripts/test-offline-transducer.sh'
14 - '.github/scripts/test-offline-ctc.sh' 14 - '.github/scripts/test-offline-ctc.sh'
  15 + - '.github/scripts/test-offline-tts.sh'
15 - 'CMakeLists.txt' 16 - 'CMakeLists.txt'
16 - 'cmake/**' 17 - 'cmake/**'
17 - 'sherpa-onnx/csrc/*' 18 - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on: @@ -24,6 +25,7 @@ on:
24 - '.github/scripts/test-online-paraformer.sh' 25 - '.github/scripts/test-online-paraformer.sh'
25 - '.github/scripts/test-offline-transducer.sh' 26 - '.github/scripts/test-offline-transducer.sh'
26 - '.github/scripts/test-offline-ctc.sh' 27 - '.github/scripts/test-offline-ctc.sh'
  28 + - '.github/scripts/test-offline-tts.sh'
27 - 'CMakeLists.txt' 29 - 'CMakeLists.txt'
28 - 'cmake/**' 30 - 'cmake/**'
29 - 'sherpa-onnx/csrc/*' 31 - 'sherpa-onnx/csrc/*'
@@ -64,6 +66,14 @@ jobs: @@ -64,6 +66,14 @@ jobs:
64 66
65 ls -lh ./bin/Release/sherpa-onnx.exe 67 ls -lh ./bin/Release/sherpa-onnx.exe
66 68
  69 + - name: Test offline TTS
  70 + shell: bash
  71 + run: |
  72 + export PATH=$PWD/build/bin/Release:$PATH
  73 + export EXE=sherpa-onnx-offline-tts.exe
  74 +
  75 + .github/scripts/test-offline-tts.sh
  76 +
67 - name: Test online paraformer for windows x64 77 - name: Test online paraformer for windows x64
68 shell: bash 78 shell: bash
69 run: | 79 run: |
@@ -12,6 +12,7 @@ on: @@ -12,6 +12,7 @@ on:
12 - '.github/scripts/test-online-paraformer.sh' 12 - '.github/scripts/test-online-paraformer.sh'
13 - '.github/scripts/test-offline-transducer.sh' 13 - '.github/scripts/test-offline-transducer.sh'
14 - '.github/scripts/test-offline-ctc.sh' 14 - '.github/scripts/test-offline-ctc.sh'
  15 + - '.github/scripts/test-offline-tts.sh'
15 - 'CMakeLists.txt' 16 - 'CMakeLists.txt'
16 - 'cmake/**' 17 - 'cmake/**'
17 - 'sherpa-onnx/csrc/*' 18 - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on: @@ -24,6 +25,7 @@ on:
24 - '.github/scripts/test-online-paraformer.sh' 25 - '.github/scripts/test-online-paraformer.sh'
25 - '.github/scripts/test-offline-transducer.sh' 26 - '.github/scripts/test-offline-transducer.sh'
26 - '.github/scripts/test-offline-ctc.sh' 27 - '.github/scripts/test-offline-ctc.sh'
  28 + - '.github/scripts/test-offline-tts.sh'
27 - 'CMakeLists.txt' 29 - 'CMakeLists.txt'
28 - 'cmake/**' 30 - 'cmake/**'
29 - 'sherpa-onnx/csrc/*' 31 - 'sherpa-onnx/csrc/*'
@@ -45,7 +47,7 @@ jobs: @@ -45,7 +47,7 @@ jobs:
45 shared_lib: [ON, OFF] 47 shared_lib: [ON, OFF]
46 48
47 steps: 49 steps:
48 - - uses: actions/checkout@v2 50 + - uses: actions/checkout@v3
49 with: 51 with:
50 fetch-depth: 0 52 fetch-depth: 0
51 53
@@ -65,6 +67,14 @@ jobs: @@ -65,6 +67,14 @@ jobs:
65 67
66 ls -lh ./bin/Release/sherpa-onnx.exe 68 ls -lh ./bin/Release/sherpa-onnx.exe
67 69
  70 + - name: Test offline TTS
  71 + shell: bash
  72 + run: |
  73 + export PATH=$PWD/build/bin/Release:$PATH
  74 + export EXE=sherpa-onnx-offline-tts.exe
  75 +
  76 + .github/scripts/test-offline-tts.sh
  77 +
68 - name: Test online paraformer for windows x64 78 - name: Test online paraformer for windows x64
69 shell: bash 79 shell: bash
70 run: | 80 run: |
@@ -12,6 +12,7 @@ on: @@ -12,6 +12,7 @@ on:
12 - '.github/scripts/test-online-paraformer.sh' 12 - '.github/scripts/test-online-paraformer.sh'
13 - '.github/scripts/test-offline-transducer.sh' 13 - '.github/scripts/test-offline-transducer.sh'
14 - '.github/scripts/test-offline-ctc.sh' 14 - '.github/scripts/test-offline-ctc.sh'
  15 + - '.github/scripts/test-offline-tts.sh'
15 - 'CMakeLists.txt' 16 - 'CMakeLists.txt'
16 - 'cmake/**' 17 - 'cmake/**'
17 - 'sherpa-onnx/csrc/*' 18 - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on: @@ -24,6 +25,7 @@ on:
24 - '.github/scripts/test-online-paraformer.sh' 25 - '.github/scripts/test-online-paraformer.sh'
25 - '.github/scripts/test-offline-transducer.sh' 26 - '.github/scripts/test-offline-transducer.sh'
26 - '.github/scripts/test-offline-ctc.sh' 27 - '.github/scripts/test-offline-ctc.sh'
  28 + - '.github/scripts/test-offline-tts.sh'
27 - 'CMakeLists.txt' 29 - 'CMakeLists.txt'
28 - 'cmake/**' 30 - 'cmake/**'
29 - 'sherpa-onnx/csrc/*' 31 - 'sherpa-onnx/csrc/*'
@@ -45,7 +47,7 @@ jobs: @@ -45,7 +47,7 @@ jobs:
45 shared_lib: [ON, OFF] 47 shared_lib: [ON, OFF]
46 48
47 steps: 49 steps:
48 - - uses: actions/checkout@v2 50 + - uses: actions/checkout@v3
49 with: 51 with:
50 fetch-depth: 0 52 fetch-depth: 0
51 53
@@ -65,6 +67,14 @@ jobs: @@ -65,6 +67,14 @@ jobs:
65 67
66 ls -lh ./bin/Release/sherpa-onnx.exe 68 ls -lh ./bin/Release/sherpa-onnx.exe
67 69
  70 + - name: Test offline TTS
  71 + shell: bash
  72 + run: |
  73 + export PATH=$PWD/build/bin/Release:$PATH
  74 + export EXE=sherpa-onnx-offline-tts.exe
  75 +
  76 + .github/scripts/test-offline-tts.sh
  77 +
68 - name: Test online paraformer for windows x86 78 - name: Test online paraformer for windows x86
69 shell: bash 79 shell: bash
70 run: | 80 run: |
@@ -21,6 +21,7 @@ run-bilingual*.sh @@ -21,6 +21,7 @@ run-bilingual*.sh
21 run-*-zipformer.sh 21 run-*-zipformer.sh
22 run-zh.sh 22 run-zh.sh
23 decode-file-c-api 23 decode-file-c-api
  24 +offline-tts-c-api
24 run-decode-file-c-api.sh 25 run-decode-file-c-api.sh
25 sherpa-onnx-ffmpeg 26 sherpa-onnx-ffmpeg
26 build-ios 27 build-ios
@@ -3,3 +3,6 @@ include(cargs) @@ -3,3 +3,6 @@ include(cargs)
3 include_directories(${CMAKE_SOURCE_DIR}) 3 include_directories(${CMAKE_SOURCE_DIR})
4 add_executable(decode-file-c-api decode-file-c-api.c) 4 add_executable(decode-file-c-api decode-file-c-api.c)
5 target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) 5 target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
  6 +
  7 +add_executable(offline-tts-c-api offline-tts-c-api.c)
  8 +target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
@@ -4,9 +4,19 @@ CUR_DIR :=$(shell pwd) @@ -4,9 +4,19 @@ CUR_DIR :=$(shell pwd)
4 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ 4 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
5 LDFLAGS := -L ../build/lib 5 LDFLAGS := -L ../build/lib
6 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib 6 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
7 -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lcargs 7 +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
8 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib 8 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
9 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib 9 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib
10 10
  11 +.PHONY: all clean
  12 +
  13 +all: decode-file-c-api offline-tts-c-api
  14 +
11 decode-file-c-api: decode-file-c-api.c 15 decode-file-c-api: decode-file-c-api.c
12 $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) 16 $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
  17 +
  18 +offline-tts-c-api: offline-tts-c-api.c
  19 + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
  20 +
  21 +clean:
  22 + $(RM) ./decode-file-c-api ./offline-tts-c-api
@@ -6,4 +6,13 @@ Please refer to the documentation @@ -6,4 +6,13 @@ Please refer to the documentation
6 https://k2-fsa.github.io/sherpa/onnx/c-api/index.html 6 https://k2-fsa.github.io/sherpa/onnx/c-api/index.html
7 for details. 7 for details.
8 8
  9 +
  10 +## File descriptions
  11 +
  12 +- [decode-file-c-api.c](./decode-file-c-api.c) This file shows how to use the C API
  13 + for speech recognition with a streaming model.
  14 +
  15 +- [offline-tts-c-api.c](./offline-tts-c-api.c) This file shows how to use the C API
  16 + to convert text to speech with a non-streaming model.
  17 +
9 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx 18 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
  1 +// c-api-examples/offline-tts-c-api.c
  2 +//
  3 +// Copyright (c) 2023 Xiaomi Corporation
  4 +
  5 +// This file shows how to use sherpa-onnx C API
  6 +// to convert text to speech using an offline model.
  7 +
  8 +#include <stdio.h>
  9 +#include <stdlib.h>
  10 +#include <string.h>
  11 +
  12 +#include "cargs.h"
  13 +#include "sherpa-onnx/c-api/c-api.h"
  14 +
  15 +static struct cag_option options[] = {
  16 + {.identifier = 'h',
  17 + .access_letters = "h",
  18 + .access_name = "help",
  19 + .description = "Show help"},
  20 + {.access_name = "vits-model",
  21 + .value_name = "/path/to/xxx.onnx",
  22 + .identifier = '0',
  23 + .description = "Path to VITS model"},
  24 + {.access_name = "vits-lexicon",
  25 + .value_name = "/path/to/lexicon.txt",
  26 + .identifier = '1',
  27 + .description = "Path to lexicon.txt for VITS models"},
  28 + {.access_name = "vits-tokens",
  29 + .value_name = "/path/to/tokens.txt",
  30 + .identifier = '2',
  31 + .description = "Path to tokens.txt for VITS models"},
  32 + {.access_name = "vits-noise-scale",
  33 + .value_name = "0.667",
  34 + .identifier = '3',
  35 + .description = "noise_scale for VITS models"},
  36 + {.access_name = "vits-noise-scale-w",
  37 + .value_name = "0.8",
  38 + .identifier = '4',
  39 + .description = "noise_scale_w for VITS models"},
  40 + {.access_name = "vits-length-scale",
  41 + .value_name = "1.0",
  42 + .identifier = '5',
  43 + .description =
  44 + "length_scale for VITS models. Default to 1. You can tune it "
  45 + "to change the speech speed. small -> faster; large -> slower. "},
  46 + {.access_name = "num-threads",
  47 + .value_name = "1",
  48 + .identifier = '6',
  49 + .description = "Number of threads"},
  50 + {.access_name = "provider",
  51 + .value_name = "cpu",
  52 + .identifier = '7',
  53 + .description = "Provider: cpu (default), cuda, coreml"},
  54 + {.access_name = "debug",
  55 + .value_name = "0",
  56 + .identifier = '8',
  57 + .description = "1 to show debug messages while loading the model"},
  58 + {.access_name = "sid",
  59 + .value_name = "0",
  60 + .identifier = '9',
  61 + .description = "Speaker ID. Default to 0. Note it is not used for "
  62 + "single-speaker models."},
  63 + {.access_name = "output-filename",
  64 + .value_name = "./generated.wav",
  65 + .identifier = 'a',
  66 + .description =
  67 + "Filename to save the generated audio. Default to ./generated.wav"},
  68 +};
  69 +
  70 +static void ShowUsage() {
  71 + const char *kUsageMessage =
  72 + "Offline text-to-speech with sherpa-onnx C API"
  73 + "\n"
  74 + "./offline-tts-c-api \\\n"
  75 + " --vits-model=/path/to/model.onnx \\\n"
  76 + " --vits-lexicon=/path/to/lexicon.txt \\\n"
  77 + " --vits-tokens=/path/to/tokens.txt \\\n"
  78 + " --sid=0 \\\n"
  79 + " --output-filename=./generated.wav \\\n"
  80 + " 'some text within single quotes on linux/macos or use double quotes on "
  81 + "windows'\n"
  82 + "\n"
  83 + "It will generate a file ./generated.wav as specified by "
  84 + "--output-filename.\n"
  85 + "\n"
  86 + "You can download a test model from\n"
  87 + "https://huggingface.co/csukuangfj/vits-ljs\n"
  88 + "\n"
  89 + "For instance, you can use:\n"
  90 + "wget "
  91 + "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx\n"
  92 + "wget "
  93 + "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt\n"
  94 + "wget "
  95 + "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt\n"
  96 + "\n"
  97 + "./offline-tts-c-api \\\n"
  98 + " --vits-model=./vits-ljs.onnx \\\n"
  99 + " --vits-lexicon=./lexicon.txt \\\n"
  100 + " --vits-tokens=./tokens.txt \\\n"
  101 + " --sid=0 \\\n"
  102 + " --output-filename=./generated.wav \\\n"
  103 + " 'liliana, the most beautiful and lovely assistant of our team!'\n"
  104 + "\n"
  105 + "Please see\n"
  106 + "https://k2-fsa.github.io/sherpa/onnx/tts/index.html\n"
  107 + "or details.\n\n";
  108 +
  109 + fprintf(stderr, "%s", kUsageMessage);
  110 + cag_option_print(options, CAG_ARRAY_SIZE(options), stderr);
  111 + exit(0);
  112 +}
  113 +
  114 +int32_t main(int32_t argc, char *argv[]) {
  115 + cag_option_context context;
  116 + char identifier;
  117 + const char *value;
  118 +
  119 + cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv);
  120 +
  121 + SherpaOnnxOfflineTtsConfig config;
  122 + memset(&config, 0, sizeof(config));
  123 +
  124 + int32_t sid = 0;
  125 + const char *filename = strdup("./generated.wav");
  126 + const char *text;
  127 +
  128 + while (cag_option_fetch(&context)) {
  129 + identifier = cag_option_get(&context);
  130 + value = cag_option_get_value(&context);
  131 + switch (identifier) {
  132 + case '0':
  133 + config.model.vits.model = value;
  134 + break;
  135 + case '1':
  136 + config.model.vits.lexicon = value;
  137 + break;
  138 + case '2':
  139 + config.model.vits.tokens = value;
  140 + break;
  141 + case '3':
  142 + config.model.vits.noise_scale = atof(value);
  143 + break;
  144 + case '4':
  145 + config.model.vits.noise_scale_w = atof(value);
  146 + break;
  147 + case '5':
  148 + config.model.vits.length_scale = atof(value);
  149 + break;
  150 + case '6':
  151 + config.model.num_threads = atoi(value);
  152 + break;
  153 + case '7':
  154 + config.model.provider = value;
  155 + break;
  156 + case '8':
  157 + config.model.debug = atoi(value);
  158 + break;
  159 + case '9':
  160 + sid = atoi(value);
  161 + break;
  162 + case 'a':
  163 + free((void *)filename);
  164 + filename = strdup(value);
  165 + break;
  166 + case 'h':
  167 + // fall through
  168 + default:
  169 + ShowUsage();
  170 + }
  171 + }
  172 +
  173 + if (!config.model.vits.model || !config.model.vits.lexicon ||
  174 + !config.model.vits.tokens) {
  175 + ShowUsage();
  176 + }
  177 +
  178 + // the last arg is the text
  179 + text = argv[argc - 1];
  180 + if (text[0] == '-') {
  181 + fprintf(stderr, "\n***Please input your text!***\n\n");
  182 + fprintf(stderr, "\n---------------Usage---------------\n\n");
  183 + ShowUsage();
  184 + }
  185 +
  186 + SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
  187 +
  188 + const SherpaOnnxGeneratedAudio *audio =
  189 + SherpaOnnxOfflineTtsGenerate(tts, text, sid);
  190 +
  191 + SherpaOnnxDestroyOfflineWriteWave(audio, filename);
  192 +
  193 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  194 + SherpaOnnxDestroyOfflineTts(tts);
  195 +
  196 + fprintf(stderr, "Input text is: %s\n", text);
  197 + fprintf(stderr, "Speaker ID is is: %d\n", sid);
  198 + fprintf(stderr, "Saved to: %s\n", filename);
  199 +
  200 + free((void *)filename);
  201 +
  202 + return 0;
  203 +}
@@ -12,8 +12,10 @@ @@ -12,8 +12,10 @@
12 #include "sherpa-onnx/csrc/circular-buffer.h" 12 #include "sherpa-onnx/csrc/circular-buffer.h"
13 #include "sherpa-onnx/csrc/display.h" 13 #include "sherpa-onnx/csrc/display.h"
14 #include "sherpa-onnx/csrc/offline-recognizer.h" 14 #include "sherpa-onnx/csrc/offline-recognizer.h"
  15 +#include "sherpa-onnx/csrc/offline-tts.h"
15 #include "sherpa-onnx/csrc/online-recognizer.h" 16 #include "sherpa-onnx/csrc/online-recognizer.h"
16 #include "sherpa-onnx/csrc/voice-activity-detector.h" 17 #include "sherpa-onnx/csrc/voice-activity-detector.h"
  18 +#include "sherpa-onnx/csrc/wave-writer.h"
17 19
18 struct SherpaOnnxOnlineRecognizer { 20 struct SherpaOnnxOnlineRecognizer {
19 std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl; 21 std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
@@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( @@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
204 } 206 }
205 207
206 void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) { 208 void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) {
207 - delete[] r->text;  
208 - delete[] r->json;  
209 - delete[] r->tokens;  
210 - delete[] r->tokens_arr;  
211 - delete[] r->timestamps;  
212 - delete r; 209 + if (r) {
  210 + delete[] r->text;
  211 + delete[] r->json;
  212 + delete[] r->tokens;
  213 + delete[] r->tokens_arr;
  214 + delete[] r->timestamps;
  215 + delete r;
  216 + }
213 } 217 }
214 218
215 void Reset(SherpaOnnxOnlineRecognizer *recognizer, 219 void Reset(SherpaOnnxOnlineRecognizer *recognizer,
@@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( @@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
385 389
386 void DestroyOfflineRecognizerResult( 390 void DestroyOfflineRecognizerResult(
387 const SherpaOnnxOfflineRecognizerResult *r) { 391 const SherpaOnnxOfflineRecognizerResult *r) {
388 - delete[] r->text;  
389 - delete[] r->timestamps;  
390 - delete r; 392 + if (r) {
  393 + delete[] r->text;
  394 + delete[] r->timestamps;
  395 + delete r;
  396 + }
391 } 397 }
392 398
393 // ============================================================ 399 // ============================================================
@@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected( @@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected(
493 return p->impl->IsSpeechDetected(); 499 return p->impl->IsSpeechDetected();
494 } 500 }
495 501
496 -void SherpaOnnxVoiceActivityDetectorPop(  
497 - SherpaOnnxVoiceActivityDetector *p) { 502 +void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) {
498 p->impl->Pop(); 503 p->impl->Pop();
499 } 504 }
500 505
501 -void SherpaOnnxVoiceActivityDetectorClear(  
502 - SherpaOnnxVoiceActivityDetector *p) { 506 +void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) {
503 p->impl->Clear(); 507 p->impl->Clear();
504 } 508 }
505 509
506 -const SherpaOnnxSpeechSegment *  
507 -SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) { 510 +const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
  511 + SherpaOnnxVoiceActivityDetector *p) {
508 const sherpa_onnx::SpeechSegment &segment = p->impl->Front(); 512 const sherpa_onnx::SpeechSegment &segment = p->impl->Front();
509 513
510 SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment; 514 SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
@@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) { @@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
517 } 521 }
518 522
519 void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) { 523 void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
520 - delete[] p->samples;  
521 - delete p; 524 + if (p) {
  525 + delete[] p->samples;
  526 + delete p;
  527 + }
522 } 528 }
523 529
524 void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { 530 void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
525 p->impl->Reset(); 531 p->impl->Reset();
526 } 532 }
  533 +
  534 +struct SherpaOnnxOfflineTts {
  535 + std::unique_ptr<sherpa_onnx::OfflineTts> impl;
  536 +};
  537 +
  538 +SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
  539 + const SherpaOnnxOfflineTtsConfig *config) {
  540 + sherpa_onnx::OfflineTtsConfig tts_config;
  541 +
  542 + tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
  543 + tts_config.model.vits.lexicon =
  544 + SHERPA_ONNX_OR(config->model.vits.lexicon, "");
  545 + tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
  546 + tts_config.model.vits.noise_scale =
  547 + SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
  548 + tts_config.model.vits.noise_scale_w =
  549 + SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
  550 + tts_config.model.vits.length_scale =
  551 + SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);
  552 +
  553 + tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
  554 + tts_config.model.debug = config->model.debug;
  555 + tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
  556 +
  557 + if (tts_config.model.debug) {
  558 + fprintf(stderr, "%s\n", tts_config.ToString().c_str());
  559 + }
  560 +
  561 + SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
  562 +
  563 + tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
  564 +
  565 + return tts;
  566 +}
  567 +
  568 +void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
  569 +
  570 +const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
  571 + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {
  572 + sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid);
  573 +
  574 + if (audio.samples.empty()) {
  575 + return nullptr;
  576 + }
  577 +
  578 + SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;
  579 +
  580 + float *samples = new float[audio.samples.size()];
  581 + std::copy(audio.samples.begin(), audio.samples.end(), samples);
  582 +
  583 + ans->samples = samples;
  584 + ans->n = audio.samples.size();
  585 + ans->sample_rate = audio.sample_rate;
  586 +
  587 + return ans;
  588 +}
  589 +
  590 +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
  591 + const SherpaOnnxGeneratedAudio *p) {
  592 + if (p) {
  593 + delete[] p->samples;
  594 + delete p;
  595 + }
  596 +}
  597 +
  598 +int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p,
  599 + const char *filename) {
  600 + return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n);
  601 +}
@@ -595,6 +595,62 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( @@ -595,6 +595,62 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
595 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset( 595 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
596 SherpaOnnxVoiceActivityDetector *p); 596 SherpaOnnxVoiceActivityDetector *p);
597 597
  598 +// ============================================================
  599 +// For offline Text-to-Speech (i.e., non-streaming TTS)
  600 +// ============================================================
  601 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
  602 + const char *model;
  603 + const char *lexicon;
  604 + const char *tokens;
  605 +
  606 + float noise_scale;
  607 + float noise_scale_w;
  608 + float length_scale; // < 1, faster in speed; > 1, slower in speed
  609 +} SherpaOnnxOfflineTtsVitsModelConfig;
  610 +
  611 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
  612 + SherpaOnnxOfflineTtsVitsModelConfig vits;
  613 + int32_t num_threads;
  614 + int32_t debug;
  615 + const char *provider;
  616 +} SherpaOnnxOfflineTtsModelConfig;
  617 +
  618 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
  619 + SherpaOnnxOfflineTtsModelConfig model;
  620 +} SherpaOnnxOfflineTtsConfig;
  621 +
  622 +SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
  623 + const float *samples; // in the range [-1, 1]
  624 + int32_t n; // number of samples
  625 + int32_t sample_rate;
  626 +} SherpaOnnxGeneratedAudio;
  627 +
  628 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
  629 +
  630 +// Create an instance of offline TTS. The user has to use DestroyOfflineTts()
  631 +// to free the returned pointer to avoid memory leak.
  632 +SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
  633 + const SherpaOnnxOfflineTtsConfig *config);
  634 +
  635 +// Free the pointer returned by CreateOfflineTts()
  636 +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
  637 +
  638 +// Generate audio from the given text and speaker id (sid).
  639 +// The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned
  640 +// pointer to avoid memory leak.
  641 +SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
  642 + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid);
  643 +
  644 +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
  645 + const SherpaOnnxGeneratedAudio *p);
  646 +
  647 +// Write the generated audio to a wave file.
  648 +// The saved wave file contains a single channel and has 16-bit samples.
  649 +//
  650 +// Return 1 if the write succeeded; return 0 on failure.
  651 +SHERPA_ONNX_API int32_t SherpaOnnxDestroyOfflineWriteWave(
  652 + const SherpaOnnxGeneratedAudio *p, const char *filename);
  653 +
598 #if defined(__GNUC__) 654 #if defined(__GNUC__)
599 #pragma GCC diagnostic pop 655 #pragma GCC diagnostic pop
600 #endif 656 #endif
@@ -40,7 +40,7 @@ wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt @@ -40,7 +40,7 @@ wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt
40 40
41 Please see 41 Please see
42 https://k2-fsa.github.io/sherpa/onnx/tts/index.html 42 https://k2-fsa.github.io/sherpa/onnx/tts/index.html
43 -or detailes. 43 +or details.
44 )usage"; 44 )usage";
45 45
46 sherpa_onnx::ParseOptions po(kUsageMessage); 46 sherpa_onnx::ParseOptions po(kUsageMessage);