Add C API for offline TTS. (#373)

Fangjun Kuang · GitHub
Commit ea7c45b60cac64521cca60a33b10b88fdad20ed0 ea7c45b6 1 parent 58ab7e77
.github/scripts/test-offline-tts.sh
.github/workflows/linux-gpu.yaml
.github/workflows/linux.yaml
.github/workflows/macos.yaml
.github/workflows/pkg-config.yaml
.github/workflows/windows-x64-cuda.yaml
.github/workflows/windows-x64.yaml
.github/workflows/windows-x86.yaml
.gitignore
c-api-examples/CMakeLists.txt
c-api-examples/Makefile
c-api-examples/README.md
c-api-examples/offline-tts-c-api.c
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
--- a/.github/scripts/test-offline-tts.sh 0 → 100755
查看文件 @ea7c45b
+++ b/.github/scripts/test-offline-tts.sh 0 → 100755
查看文件 @ea7c45b
+ #!/usr/bin/env bash
+ 
+ set -e
+ 
+ log() {
+   # This function is from espnet
+   local fname=${BASH_SOURCE[1]##*/}
+   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+ }
+ 
+ echo "EXE is $EXE"
+ echo "PATH: $PATH"
+ 
+ which $EXE
+ 
+ # test waves are saved in ./tts
+ mkdir ./tts
+ 
+ log "------------------------------------------------------------"
+ log "vits-ljs test"
+ log "------------------------------------------------------------"
+ 
+ repo_url=https://huggingface.co/csukuangfj/vits-ljs
+ log "Start testing ${repo_url}"
+ repo=$(basename $repo_url)
+ log "Download pretrained model from $repo_url"
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+ pushd $repo
+ git lfs pull --include "*.onnx"
+ ls -lh *.onnx
+ popd
+ 
+ $EXE \
+   --vits-model=$repo/vits-ljs.onnx \
+   --vits-lexicon=$repo/lexicon.txt \
+   --vits-tokens=$repo/tokens.txt \
+   --output-filename=./tts/vits-ljs.wav \
+   'liliana, the most beautiful and lovely assistant of our team!'
+ 
+ ls -lh ./tts
+ 
+ rm -rfv $repo
+ 
+ log "------------------------------------------------------------"
+ log "vits-vctk test"
+ log "------------------------------------------------------------"
+ 
+ repo_url=https://huggingface.co/csukuangfj/vits-vctk
+ log "Start testing ${repo_url}"
+ repo=$(basename $repo_url)
+ log "Download pretrained model from $repo_url"
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+ pushd $repo
+ git lfs pull --include "*.onnx"
+ ls -lh *.onnx
+ popd
+ 
+ for sid in 0 10 90; do
+   $EXE \
+     --vits-model=$repo/vits-vctk.onnx \
+     --vits-lexicon=$repo/lexicon.txt \
+     --vits-tokens=$repo/tokens.txt \
+     --sid=$sid \
+     --output-filename=./tts/vits-vctk-${sid}.wav \
+     'liliana, the most beautiful and lovely assistant of our team!'
+ done
+ 
+ rm -rfv $repo
+ 
+ ls -lh tts/
+ 
+ log "------------------------------------------------------------"
+ log "vits-zh-aishell3"
+ log "------------------------------------------------------------"
+ 
+ repo_url=https://huggingface.co/csukuangfj/vits-zh-aishell3
+ log "Start testing ${repo_url}"
+ repo=$(basename $repo_url)
+ log "Download pretrained model from $repo_url"
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+ pushd $repo
+ git lfs pull --include "*.onnx"
+ ls -lh *.onnx
+ popd
+ 
+ for sid in 0 10 90; do
+   $EXE \
+     --vits-model=$repo/vits-aishell3.onnx \
+     --vits-lexicon=$repo/lexicon.txt \
+     --vits-tokens=$repo/tokens.txt \
+     --sid=$sid \
+     --output-filename=./tts/vits-aishell3-${sid}.wav \
+     '林美丽最美丽'
+ done
+ 
+ rm -rfv $repo
+ 
+ ls -lh ./tts/
--- a/.github/workflows/linux-gpu.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/linux-gpu.yaml
查看文件 @ea7c45b
@@ -12,6 +12,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -26,6 +27,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -48,7 +50,7 @@ jobs:
         build_type: [Release, Debug]
 
     steps:
-       - uses: actions/checkout@v2
+       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
 
@@ -75,6 +77,14 @@ jobs:
           file build/bin/sherpa-onnx
           readelf -d build/bin/sherpa-onnx
 
+       - name: Test offline TTS
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin:$PATH
+           export EXE=sherpa-onnx-offline-tts
+ 
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online paraformer
         shell: bash
         run: |
--- a/.github/workflows/linux.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/linux.yaml
查看文件 @ea7c45b
@@ -12,6 +12,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -26,6 +27,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -49,7 +51,7 @@ jobs:
         shared_lib: [ON, OFF]
 
     steps:
-       - uses: actions/checkout@v2
+       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
 
@@ -76,6 +78,14 @@ jobs:
           file build/bin/sherpa-onnx
           readelf -d build/bin/sherpa-onnx
 
+       - name: Test offline TTS
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin:$PATH
+           export EXE=sherpa-onnx-offline-tts
+ 
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online paraformer
         shell: bash
         run: |
@@ -150,3 +160,8 @@ jobs:
           file_glob: true
           overwrite: true
           file: sherpa-onnx-*linux-x64.tar.bz2
+ 
+       - uses: actions/upload-artifact@v3
+         with:
+           name: tts-generated-test-files
+           path: tts
--- a/.github/workflows/macos.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/macos.yaml
查看文件 @ea7c45b
@@ -12,6 +12,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -44,7 +46,7 @@ jobs:
         build_type: [Release, Debug]
 
     steps:
-       - uses: actions/checkout@v2
+       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
 
@@ -74,6 +76,14 @@ jobs:
           otool -L build/bin/sherpa-onnx
           otool -l build/bin/sherpa-onnx
 
+       - name: Test offline TTS
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin:$PATH
+           export EXE=sherpa-onnx-offline-tts
+ 
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online paraformer
         shell: bash
         run: |
--- a/.github/workflows/pkg-config.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/pkg-config.yaml
查看文件 @ea7c45b
@@ -9,6 +9,7 @@ on:
       - '*'
     paths:
       - '.github/workflows/pkg-config.yaml'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -19,6 +20,7 @@ on:
       - master
     paths:
       - '.github/workflows/pkg-config.yaml'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -92,9 +94,20 @@ jobs:
         run: |
           export PKG_CONFIG_PATH=$PWD/build/install:$PKG_CONFIG_PATH
           cd c-api-examples
+ 
           gcc -o decode-file-c-api $(pkg-config --cflags sherpa-onnx) ./decode-file-c-api.c $(pkg-config --libs sherpa-onnx)
           ./decode-file-c-api --help
 
+           gcc -o offline-tts-c-api $(pkg-config --cflags sherpa-onnx) ./offline-tts-c-api.c $(pkg-config --libs sherpa-onnx)
+           ./offline-tts-c-api --help
+ 
+       - name: Test offline TTS C API
+         shell: bash
+         run: |
+           export PATH=$PWD/c-api-examples:$PATH
+           export EXE=offline-tts-c-api
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online transducer (C API)
         shell: bash
         run: |
@@ -102,3 +115,8 @@ jobs:
           export EXE=decode-file-c-api
 
           .github/scripts/test-online-transducer.sh
+ 
+       - uses: actions/upload-artifact@v3
+         with:
+           name: tts-generated-test-files
+           path: tts
--- a/.github/workflows/windows-x64-cuda.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/windows-x64-cuda.yaml
查看文件 @ea7c45b
@@ -12,6 +12,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -64,6 +66,14 @@ jobs:
 
           ls -lh ./bin/Release/sherpa-onnx.exe
 
+       - name: Test offline TTS
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin/Release:$PATH
+           export EXE=sherpa-onnx-offline-tts.exe
+ 
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online paraformer for windows x64
         shell: bash
         run: |
--- a/.github/workflows/windows-x64.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/windows-x64.yaml
查看文件 @ea7c45b
@@ -12,6 +12,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -45,7 +47,7 @@ jobs:
         shared_lib: [ON, OFF]
 
     steps:
-       - uses: actions/checkout@v2
+       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
 
@@ -65,6 +67,14 @@ jobs:
 
           ls -lh ./bin/Release/sherpa-onnx.exe
 
+       - name: Test offline TTS
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin/Release:$PATH
+           export EXE=sherpa-onnx-offline-tts.exe
+ 
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online paraformer for windows x64
         shell: bash
         run: |
--- a/.github/workflows/windows-x86.yaml
查看文件 @ea7c45b
+++ b/.github/workflows/windows-x86.yaml
查看文件 @ea7c45b
@@ -12,6 +12,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -24,6 +25,7 @@ on:
       - '.github/scripts/test-online-paraformer.sh'
       - '.github/scripts/test-offline-transducer.sh'
       - '.github/scripts/test-offline-ctc.sh'
+       - '.github/scripts/test-offline-tts.sh'
       - 'CMakeLists.txt'
       - 'cmake/**'
       - 'sherpa-onnx/csrc/*'
@@ -45,7 +47,7 @@ jobs:
         shared_lib: [ON, OFF]
 
     steps:
-       - uses: actions/checkout@v2
+       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
 
@@ -65,6 +67,14 @@ jobs:
 
           ls -lh ./bin/Release/sherpa-onnx.exe
 
+       - name: Test offline TTS
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin/Release:$PATH
+           export EXE=sherpa-onnx-offline-tts.exe
+ 
+           .github/scripts/test-offline-tts.sh
+ 
       - name: Test online paraformer for windows x86
         shell: bash
         run: |
--- a/.gitignore
查看文件 @ea7c45b
+++ b/.gitignore
查看文件 @ea7c45b
@@ -21,6 +21,7 @@ run-bilingual*.sh
 run-*-zipformer.sh
 run-zh.sh
 decode-file-c-api
+ offline-tts-c-api
 run-decode-file-c-api.sh
 sherpa-onnx-ffmpeg
 build-ios
--- a/c-api-examples/CMakeLists.txt
查看文件 @ea7c45b
+++ b/c-api-examples/CMakeLists.txt
查看文件 @ea7c45b
@@ -3,3 +3,6 @@ include(cargs)
 include_directories(${CMAKE_SOURCE_DIR})
 add_executable(decode-file-c-api decode-file-c-api.c)
 target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
+ 
+ add_executable(offline-tts-c-api offline-tts-c-api.c)
+ target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
--- a/c-api-examples/Makefile
查看文件 @ea7c45b
+++ b/c-api-examples/Makefile
查看文件 @ea7c45b
@@ -4,9 +4,19 @@ CUR_DIR :=$(shell pwd)
 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
 LDFLAGS := -L ../build/lib
 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
- LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lcargs
+ LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/_deps/onnxruntime-src/lib
 
+ .PHONY: all clean
+ 
+ all: decode-file-c-api offline-tts-c-api
+ 
 decode-file-c-api: decode-file-c-api.c
 	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+ 
+ offline-tts-c-api: offline-tts-c-api.c
+ 	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+ 
+ clean:
+ 	$(RM) ./decode-file-c-api ./offline-tts-c-api
--- a/c-api-examples/README.md
查看文件 @ea7c45b
+++ b/c-api-examples/README.md
查看文件 @ea7c45b
@@ -6,4 +6,13 @@ Please refer to the documentation
 https://k2-fsa.github.io/sherpa/onnx/c-api/index.html
 for details.
 
+ 
+ ## File descriptions
+ 
+ - [decode-file-c-api.c](./decode-file-c-api.c) This file shows how to use the C API
+   for speech recognition with a streaming model.
+ 
+ - [offline-tts-c-api.c](./offline-tts-c-api.c) This file shows how to use the C API
+   to convert text to speech with a non-streaming model.
+ 
 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
--- a/c-api-examples/offline-tts-c-api.c 0 → 100644
查看文件 @ea7c45b
+++ b/c-api-examples/offline-tts-c-api.c 0 → 100644
查看文件 @ea7c45b
+ // c-api-examples/offline-tts-c-api.c
+ //
+ // Copyright (c)  2023  Xiaomi Corporation
+ 
+ // This file shows how to use sherpa-onnx C API
+ // to convert text to speech using an offline model.
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "cargs.h"
+ #include "sherpa-onnx/c-api/c-api.h"
+ 
+ static struct cag_option options[] = {
+     {.identifier = 'h',
+      .access_letters = "h",
+      .access_name = "help",
+      .description = "Show help"},
+     {.access_name = "vits-model",
+      .value_name = "/path/to/xxx.onnx",
+      .identifier = '0',
+      .description = "Path to VITS model"},
+     {.access_name = "vits-lexicon",
+      .value_name = "/path/to/lexicon.txt",
+      .identifier = '1',
+      .description = "Path to lexicon.txt for VITS models"},
+     {.access_name = "vits-tokens",
+      .value_name = "/path/to/tokens.txt",
+      .identifier = '2',
+      .description = "Path to tokens.txt for VITS models"},
+     {.access_name = "vits-noise-scale",
+      .value_name = "0.667",
+      .identifier = '3',
+      .description = "noise_scale for VITS models"},
+     {.access_name = "vits-noise-scale-w",
+      .value_name = "0.8",
+      .identifier = '4',
+      .description = "noise_scale_w for VITS models"},
+     {.access_name = "vits-length-scale",
+      .value_name = "1.0",
+      .identifier = '5',
+      .description =
+          "length_scale for VITS models. Default to 1. You can tune it "
+          "to change the speech speed. small -> faster; large -> slower. "},
+     {.access_name = "num-threads",
+      .value_name = "1",
+      .identifier = '6',
+      .description = "Number of threads"},
+     {.access_name = "provider",
+      .value_name = "cpu",
+      .identifier = '7',
+      .description = "Provider: cpu (default), cuda, coreml"},
+     {.access_name = "debug",
+      .value_name = "0",
+      .identifier = '8',
+      .description = "1 to show debug messages while loading the model"},
+     {.access_name = "sid",
+      .value_name = "0",
+      .identifier = '9',
+      .description = "Speaker ID. Default to 0. Note it is not used for "
+                     "single-speaker models."},
+     {.access_name = "output-filename",
+      .value_name = "./generated.wav",
+      .identifier = 'a',
+      .description =
+          "Filename to save the generated audio. Default to ./generated.wav"},
+ };
+ 
+ static void ShowUsage() {
+   const char *kUsageMessage =
+       "Offline text-to-speech with sherpa-onnx C API"
+       "\n"
+       "./offline-tts-c-api \\\n"
+       " --vits-model=/path/to/model.onnx \\\n"
+       " --vits-lexicon=/path/to/lexicon.txt \\\n"
+       " --vits-tokens=/path/to/tokens.txt \\\n"
+       " --sid=0 \\\n"
+       " --output-filename=./generated.wav \\\n"
+       " 'some text within single quotes on linux/macos or use double quotes on "
+       "windows'\n"
+       "\n"
+       "It will generate a file ./generated.wav as specified by "
+       "--output-filename.\n"
+       "\n"
+       "You can download a test model from\n"
+       "https://huggingface.co/csukuangfj/vits-ljs\n"
+       "\n"
+       "For instance, you can use:\n"
+       "wget "
+       "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx\n"
+       "wget "
+       "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt\n"
+       "wget "
+       "https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt\n"
+       "\n"
+       "./offline-tts-c-api \\\n"
+       "  --vits-model=./vits-ljs.onnx \\\n"
+       "  --vits-lexicon=./lexicon.txt \\\n"
+       "  --vits-tokens=./tokens.txt \\\n"
+       "  --sid=0 \\\n"
+       "  --output-filename=./generated.wav \\\n"
+       "  'liliana, the most beautiful and lovely assistant of our team!'\n"
+       "\n"
+       "Please see\n"
+       "https://k2-fsa.github.io/sherpa/onnx/tts/index.html\n"
+       "or details.\n\n";
+ 
+   fprintf(stderr, "%s", kUsageMessage);
+   cag_option_print(options, CAG_ARRAY_SIZE(options), stderr);
+   exit(0);
+ }
+ 
+ int32_t main(int32_t argc, char *argv[]) {
+   cag_option_context context;
+   char identifier;
+   const char *value;
+ 
+   cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv);
+ 
+   SherpaOnnxOfflineTtsConfig config;
+   memset(&config, 0, sizeof(config));
+ 
+   int32_t sid = 0;
+   const char *filename = strdup("./generated.wav");
+   const char *text;
+ 
+   while (cag_option_fetch(&context)) {
+     identifier = cag_option_get(&context);
+     value = cag_option_get_value(&context);
+     switch (identifier) {
+       case '0':
+         config.model.vits.model = value;
+         break;
+       case '1':
+         config.model.vits.lexicon = value;
+         break;
+       case '2':
+         config.model.vits.tokens = value;
+         break;
+       case '3':
+         config.model.vits.noise_scale = atof(value);
+         break;
+       case '4':
+         config.model.vits.noise_scale_w = atof(value);
+         break;
+       case '5':
+         config.model.vits.length_scale = atof(value);
+         break;
+       case '6':
+         config.model.num_threads = atoi(value);
+         break;
+       case '7':
+         config.model.provider = value;
+         break;
+       case '8':
+         config.model.debug = atoi(value);
+         break;
+       case '9':
+         sid = atoi(value);
+         break;
+       case 'a':
+         free((void *)filename);
+         filename = strdup(value);
+         break;
+       case 'h':
+         // fall through
+       default:
+         ShowUsage();
+     }
+   }
+ 
+   if (!config.model.vits.model || !config.model.vits.lexicon ||
+       !config.model.vits.tokens) {
+     ShowUsage();
+   }
+ 
+   // the last arg is the text
+   text = argv[argc - 1];
+   if (text[0] == '-') {
+     fprintf(stderr, "\n***Please input your text!***\n\n");
+     fprintf(stderr, "\n---------------Usage---------------\n\n");
+     ShowUsage();
+   }
+ 
+   SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
+ 
+   const SherpaOnnxGeneratedAudio *audio =
+       SherpaOnnxOfflineTtsGenerate(tts, text, sid);
+ 
+   SherpaOnnxDestroyOfflineWriteWave(audio, filename);
+ 
+   SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+   SherpaOnnxDestroyOfflineTts(tts);
+ 
+   fprintf(stderr, "Input text is: %s\n", text);
+   fprintf(stderr, "Speaker ID is is: %d\n", sid);
+   fprintf(stderr, "Saved to: %s\n", filename);
+ 
+   free((void *)filename);
+ 
+   return 0;
+ }
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @ea7c45b
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @ea7c45b
@@ -12,8 +12,10 @@
 #include "sherpa-onnx/csrc/circular-buffer.h"
 #include "sherpa-onnx/csrc/display.h"
 #include "sherpa-onnx/csrc/offline-recognizer.h"
+ #include "sherpa-onnx/csrc/offline-tts.h"
 #include "sherpa-onnx/csrc/online-recognizer.h"
 #include "sherpa-onnx/csrc/voice-activity-detector.h"
+ #include "sherpa-onnx/csrc/wave-writer.h"
 
 struct SherpaOnnxOnlineRecognizer {
   std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
@@ -204,12 +206,14 @@ const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
 }
 
 void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) {
-   delete[] r->text;
-   delete[] r->json;
-   delete[] r->tokens;
-   delete[] r->tokens_arr;
-   delete[] r->timestamps;
-   delete r;
+   if (r) {
+     delete[] r->text;
+     delete[] r->json;
+     delete[] r->tokens;
+     delete[] r->tokens_arr;
+     delete[] r->timestamps;
+     delete r;
+   }
 }
 
 void Reset(SherpaOnnxOnlineRecognizer *recognizer,
@@ -385,9 +389,11 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
 
 void DestroyOfflineRecognizerResult(
     const SherpaOnnxOfflineRecognizerResult *r) {
-   delete[] r->text;
-   delete[] r->timestamps;
-   delete r;
+   if (r) {
+     delete[] r->text;
+     delete[] r->timestamps;
+     delete r;
+   }
 }
 
 // ============================================================
@@ -493,18 +499,16 @@ int32_t SherpaOnnxVoiceActivityDetectorDetected(
   return p->impl->IsSpeechDetected();
 }
 
- void SherpaOnnxVoiceActivityDetectorPop(
-     SherpaOnnxVoiceActivityDetector *p) {
+ void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) {
   p->impl->Pop();
 }
 
- void SherpaOnnxVoiceActivityDetectorClear(
-     SherpaOnnxVoiceActivityDetector *p) {
+ void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) {
   p->impl->Clear();
 }
 
- const SherpaOnnxSpeechSegment *
- SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
+ const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront(
+     SherpaOnnxVoiceActivityDetector *p) {
   const sherpa_onnx::SpeechSegment &segment = p->impl->Front();
 
   SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment;
@@ -517,10 +521,81 @@ SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p) {
 }
 
 void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) {
-   delete[] p->samples;
-   delete p;
+   if (p) {
+     delete[] p->samples;
+     delete p;
+   }
 }
 
 void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
   p->impl->Reset();
 }
+ 
+ struct SherpaOnnxOfflineTts {
+   std::unique_ptr<sherpa_onnx::OfflineTts> impl;
+ };
+ 
+ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
+     const SherpaOnnxOfflineTtsConfig *config) {
+   sherpa_onnx::OfflineTtsConfig tts_config;
+ 
+   tts_config.model.vits.model = SHERPA_ONNX_OR(config->model.vits.model, "");
+   tts_config.model.vits.lexicon =
+       SHERPA_ONNX_OR(config->model.vits.lexicon, "");
+   tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
+   tts_config.model.vits.noise_scale =
+       SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
+   tts_config.model.vits.noise_scale_w =
+       SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
+   tts_config.model.vits.length_scale =
+       SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);
+ 
+   tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
+   tts_config.model.debug = config->model.debug;
+   tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
+ 
+   if (tts_config.model.debug) {
+     fprintf(stderr, "%s\n", tts_config.ToString().c_str());
+   }
+ 
+   SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
+ 
+   tts->impl = std::make_unique<sherpa_onnx::OfflineTts>(tts_config);
+ 
+   return tts;
+ }
+ 
+ void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
+ 
+ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
+     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {
+   sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid);
+ 
+   if (audio.samples.empty()) {
+     return nullptr;
+   }
+ 
+   SherpaOnnxGeneratedAudio *ans = new SherpaOnnxGeneratedAudio;
+ 
+   float *samples = new float[audio.samples.size()];
+   std::copy(audio.samples.begin(), audio.samples.end(), samples);
+ 
+   ans->samples = samples;
+   ans->n = audio.samples.size();
+   ans->sample_rate = audio.sample_rate;
+ 
+   return ans;
+ }
+ 
+ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
+     const SherpaOnnxGeneratedAudio *p) {
+   if (p) {
+     delete[] p->samples;
+     delete p;
+   }
+ }
+ 
+ int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p,
+                                           const char *filename) {
+   return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n);
+ }
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @ea7c45b
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @ea7c45b
@@ -595,6 +595,62 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
 SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
     SherpaOnnxVoiceActivityDetector *p);
 
+ // ============================================================
+ // For offline Text-to-Speech (i.e., non-streaming TTS)
+ // ============================================================
+ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
+   const char *model;
+   const char *lexicon;
+   const char *tokens;
+ 
+   float noise_scale;
+   float noise_scale_w;
+   float length_scale;  // < 1, faster in speed; > 1, slower in speed
+ } SherpaOnnxOfflineTtsVitsModelConfig;
+ 
+ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
+   SherpaOnnxOfflineTtsVitsModelConfig vits;
+   int32_t num_threads;
+   int32_t debug;
+   const char *provider;
+ } SherpaOnnxOfflineTtsModelConfig;
+ 
+ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
+   SherpaOnnxOfflineTtsModelConfig model;
+ } SherpaOnnxOfflineTtsConfig;
+ 
+ SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
+   const float *samples;  // in the range [-1, 1]
+   int32_t n;             // number of samples
+   int32_t sample_rate;
+ } SherpaOnnxGeneratedAudio;
+ 
+ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
+ 
+ // Create an instance of offline TTS. The user has to use DestroyOfflineTts()
+ // to free the returned pointer to avoid memory leak.
+ SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
+     const SherpaOnnxOfflineTtsConfig *config);
+ 
+ // Free the pointer returned by CreateOfflineTts()
+ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
+ 
+ // Generate audio from the given text and speaker id (sid).
+ // The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned
+ // pointer to avoid memory leak.
+ SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
+     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid);
+ 
+ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
+     const SherpaOnnxGeneratedAudio *p);
+ 
+ // Write the generated audio to a wave file.
+ // The saved wave file contains a single channel and has 16-bit samples.
+ //
+ // Return 1 if the write succeeded; return 0 on failure.
+ SHERPA_ONNX_API int32_t SherpaOnnxDestroyOfflineWriteWave(
+     const SherpaOnnxGeneratedAudio *p, const char *filename);
+ 
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
查看文件 @ea7c45b
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
查看文件 @ea7c45b
@@ -40,7 +40,7 @@ wget https://huggingface.co/csukuangfj/vits-ljs/resolve/main/tokens.txt
 
 Please see
 https://k2-fsa.github.io/sherpa/onnx/tts/index.html
- or detailes.
+ or details.
 )usage";
 
   sherpa_onnx::ParseOptions po(kUsageMessage);