Fangjun Kuang
Committed by GitHub

Add Java API for text-to-speech (#811)

@@ -138,3 +138,21 @@ jobs: @@ -138,3 +138,21 @@ jobs:
138 138
139 ./run-non-streaming-decode-file-nemo.sh 139 ./run-non-streaming-decode-file-nemo.sh
140 rm -rf sherpa-onnx-nemo-* 140 rm -rf sherpa-onnx-nemo-*
  141 +
  142 + - name: Run java test (Non-Streaming TTS)
  143 + shell: bash
  144 + run: |
  145 + cd ./java-api-examples
  146 + ./run-non-streaming-tts-piper-en.sh
  147 + rm -rf vits-piper-*
  148 +
  149 + ./run-non-streaming-tts-coqui-de.sh
  150 + rm -rf vits-coqui-*
  151 +
  152 + ./run-non-streaming-tts-vits-zh.sh
  153 + rm -rf vits-zh-*
  154 +
  155 + - uses: actions/upload-artifact@v4
  156 + with:
  157 + name: tts-wav-files-${{ matrix.os }}
  158 + path: java-api-examples/*.wav
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use a Coqui-ai VITS German TTS model
  4 +// to convert text to speech
  5 +import com.k2fsa.sherpa.onnx.*;
  6 +
  7 +public class NonStreamingTtsCoquiDe {
  8 + public static void main(String[] args) {
  9 + // please visit
  10 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  11 + // to download model files
  12 + String model = "./vits-coqui-de-css10/model.onnx";
  13 + String tokens = "./vits-coqui-de-css10/tokens.txt";
  14 + String text = "Alles hat ein Ende, nur die Wurst hat zwei.";
  15 +
  16 + OfflineTtsVitsModelConfig vitsModelConfig =
  17 + OfflineTtsVitsModelConfig.builder().setModel(model).setTokens(tokens).build();
  18 +
  19 + OfflineTtsModelConfig modelConfig =
  20 + OfflineTtsModelConfig.builder()
  21 + .setVits(vitsModelConfig)
  22 + .setNumThreads(1)
  23 + .setDebug(true)
  24 + .build();
  25 +
  26 + OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
  27 + OfflineTts tts = new OfflineTts(config);
  28 +
  29 + int sid = 0;
  30 + float speed = 1.0f;
  31 + long start = System.currentTimeMillis();
  32 + GeneratedAudio audio = tts.generate(text, sid, speed);
  33 + long stop = System.currentTimeMillis();
  34 +
  35 + float timeElapsedSeconds = (stop - start) / 1000.0f;
  36 +
  37 + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
  38 + float real_time_factor = timeElapsedSeconds / audioDuration;
  39 +
  40 + String waveFilename = "tts-coqui-de.wav";
  41 + audio.save(waveFilename);
  42 + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
  43 + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
  44 + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
  45 + System.out.printf("-- text: %s\n", text);
  46 + System.out.printf("-- Saved to %s\n", waveFilename);
  47 +
  48 + tts.release();
  49 + }
  50 +}
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use a piper VITS English TTS model
  4 +// to convert text to speech
  5 +import com.k2fsa.sherpa.onnx.*;
  6 +
  7 +public class NonStreamingTtsPiperEn {
  8 + public static void main(String[] args) {
  9 + // please visit
  10 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  11 + // to download model files
  12 + String model = "./vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx";
  13 + String tokens = "./vits-piper-en_GB-cori-medium/tokens.txt";
  14 + String dataDir = "./vits-piper-en_GB-cori-medium/espeak-ng-data";
  15 + String text =
  16 + "Today as always, men fall into two groups: slaves and free men. Whoever does not have"
  17 + + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
  18 + + " businessman, an official, or a scholar.";
  19 +
  20 + OfflineTtsVitsModelConfig vitsModelConfig =
  21 + OfflineTtsVitsModelConfig.builder()
  22 + .setModel(model)
  23 + .setTokens(tokens)
  24 + .setDataDir(dataDir)
  25 + .build();
  26 +
  27 + OfflineTtsModelConfig modelConfig =
  28 + OfflineTtsModelConfig.builder()
  29 + .setVits(vitsModelConfig)
  30 + .setNumThreads(1)
  31 + .setDebug(true)
  32 + .build();
  33 +
  34 + OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
  35 + OfflineTts tts = new OfflineTts(config);
  36 +
  37 + int sid = 0;
  38 + float speed = 1.0f;
  39 + long start = System.currentTimeMillis();
  40 + GeneratedAudio audio = tts.generate(text, sid, speed);
  41 + long stop = System.currentTimeMillis();
  42 +
  43 + float timeElapsedSeconds = (stop - start) / 1000.0f;
  44 +
  45 + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
  46 + float real_time_factor = timeElapsedSeconds / audioDuration;
  47 +
  48 + String waveFilename = "tts-piper-en.wav";
  49 + audio.save(waveFilename);
  50 + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
  51 + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
  52 + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
  53 + System.out.printf("-- text: %s\n", text);
  54 + System.out.printf("-- Saved to %s\n", waveFilename);
  55 +
  56 + tts.release();
  57 + }
  58 +}
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use a VITS Chinese TTS model
  4 +// to convert text to speech.
  5 +//
  6 +// You can use https://github.com/Plachtaa/VITS-fast-fine-tuning
  7 +// to train your model
  8 +import com.k2fsa.sherpa.onnx.*;
  9 +
  10 +public class NonStreamingTtsPiperEn {
  11 + public static void main(String[] args) {
  12 + // please visit
  13 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  14 + // to download model files
  15 + String model = "./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx";
  16 + String tokens = "./vits-zh-hf-fanchen-C/tokens.txt";
  17 + String lexicon = "./vits-zh-hf-fanchen-C/lexicon.txt";
  18 + String dictDir = "./vits-zh-hf-fanchen-C/dict";
  19 + String ruleFsts =
  20 + "./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst";
  21 + String text = "有问题,请拨打110或者手机18601239876。我们的价值观是真诚热爱!";
  22 +
  23 + OfflineTtsVitsModelConfig vitsModelConfig =
  24 + OfflineTtsVitsModelConfig.builder()
  25 + .setModel(model)
  26 + .setTokens(tokens)
  27 + .setLexicon(lexicon)
  28 + .setDictDir(dictDir)
  29 + .build();
  30 +
  31 + OfflineTtsModelConfig modelConfig =
  32 + OfflineTtsModelConfig.builder()
  33 + .setVits(vitsModelConfig)
  34 + .setNumThreads(1)
  35 + .setDebug(true)
  36 + .build();
  37 +
  38 + OfflineTtsConfig config =
  39 + OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build();
  40 +
  41 + OfflineTts tts = new OfflineTts(config);
  42 +
  43 + int sid = 100;
  44 + float speed = 1.0f;
  45 + long start = System.currentTimeMillis();
  46 + GeneratedAudio audio = tts.generate(text, sid, speed);
  47 + long stop = System.currentTimeMillis();
  48 +
  49 + float timeElapsedSeconds = (stop - start) / 1000.0f;
  50 +
  51 + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
  52 + float real_time_factor = timeElapsedSeconds / audioDuration;
  53 +
  54 + String waveFilename = "tts-vits-zh.wav";
  55 + audio.save(waveFilename);
  56 + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
  57 + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
  58 + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
  59 + System.out.printf("-- text: %s\n", text);
  60 + System.out.printf("-- Saved to %s\n", waveFilename);
  61 +
  62 + tts.release();
  63 + }
  64 +}
@@ -21,3 +21,11 @@ This directory contains examples for the JAVA API of sherpa-onnx. @@ -21,3 +21,11 @@ This directory contains examples for the JAVA API of sherpa-onnx.
21 ./run-non-streaming-decode-file-whisper.sh 21 ./run-non-streaming-decode-file-whisper.sh
22 ./run-non-streaming-decode-file-nemo.sh 22 ./run-non-streaming-decode-file-nemo.sh
23 ``` 23 ```
  24 +
  25 +## Non-Streaming text-to-speech
  26 +
  27 +```bash
  28 +./run-non-streaming-tts-piper-en.sh
  29 +./run-non-streaming-tts-coqui-de.sh
  30 +./run-non-streaming-tts-vits-zh.sh
  31 +```
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  29 + cmake \
  30 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  31 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  32 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  33 + -DBUILD_SHARED_LIBS=ON \
  34 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  35 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  36 + ..
  37 +
  38 + make -j4
  39 + ls -lh lib
  40 +fi
  41 +
  42 +# please visit
  43 +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  44 +# to download more models
  45 +if [ ! -f ./vits-coqui-de-css10/tokens.txt ]; then
  46 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  47 + tar xf vits-coqui-de-css10.tar.bz2
  48 + rm vits-coqui-de-css10.tar.bz2
  49 +fi
  50 +
  51 +java \
  52 + -Djava.library.path=$PWD/../build/lib \
  53 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  54 + NonStreamingTtsCoquiDe.java
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  29 + cmake \
  30 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  31 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  32 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  33 + -DBUILD_SHARED_LIBS=ON \
  34 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  35 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  36 + ..
  37 +
  38 + make -j4
  39 + ls -lh lib
  40 +fi
  41 +
  42 +# please visit
  43 +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  44 +# to download more models
  45 +if [ ! -f ./vits-piper-en_GB-cori-medium/tokens.txt ]; then
  46 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
  47 + tar xf vits-piper-en_GB-cori-medium.tar.bz2
  48 + rm vits-piper-en_GB-cori-medium.tar.bz2
  49 +fi
  50 +
  51 +java \
  52 + -Djava.library.path=$PWD/../build/lib \
  53 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  54 + NonStreamingTtsPiperEn.java
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  29 + cmake \
  30 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  31 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  32 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  33 + -DBUILD_SHARED_LIBS=ON \
  34 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  35 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  36 + ..
  37 +
  38 + make -j4
  39 + ls -lh lib
  40 +fi
  41 +
  42 +# please visit
  43 +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  44 +# to download more models
  45 +if [ ! -f ./vits-zh-hf-fanchen-C/tokens.txt ]; then
  46 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
  47 + tar xf vits-zh-hf-fanchen-C.tar.bz2
  48 + rm vits-zh-hf-fanchen-C.tar.bz2
  49 +fi
  50 +
  51 +java \
  52 + -Djava.library.path=$PWD/../build/lib \
  53 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  54 + NonStreamingTtsVitsZh.java
@@ -30,6 +30,12 @@ java_files += OfflineRecognizerResult.java @@ -30,6 +30,12 @@ java_files += OfflineRecognizerResult.java
30 java_files += OfflineStream.java 30 java_files += OfflineStream.java
31 java_files += OfflineRecognizer.java 31 java_files += OfflineRecognizer.java
32 32
  33 +java_files += OfflineTtsVitsModelConfig.java
  34 +java_files += OfflineTtsModelConfig.java
  35 +java_files += OfflineTtsConfig.java
  36 +java_files += GeneratedAudio.java
  37 +java_files += OfflineTts.java
  38 +
33 class_files := $(java_files:%.java=%.class) 39 class_files := $(java_files:%.java=%.class)
34 40
35 java_files := $(addprefix src/$(package_dir)/,$(java_files)) 41 java_files := $(addprefix src/$(package_dir)/,$(java_files))
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class GeneratedAudio {
  6 + static {
  7 + System.loadLibrary("sherpa-onnx-jni");
  8 + }
  9 +
  10 + private final float[] samples;
  11 + private final int sampleRate;
  12 +
  13 + public GeneratedAudio(float[] samples, int sampleRate) {
  14 + this.samples = samples;
  15 + this.sampleRate = sampleRate;
  16 + }
  17 +
  18 + public int getSampleRate() {
  19 + return sampleRate;
  20 + }
  21 +
  22 + public float[] getSamples() {
  23 + return samples;
  24 + }
  25 +
  26 + // return true if saved successfully.
  27 + public boolean save(String filename) {
  28 + return saveImpl(filename, samples, sampleRate);
  29 + }
  30 +
  31 + private native boolean saveImpl(String filename, float[] samples, int sampleRate);
  32 +}
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class OfflineTts {
  6 + static {
  7 + System.loadLibrary("sherpa-onnx-jni");
  8 + }
  9 +
  10 + private long ptr = 0; // this is the asr engine ptrss
  11 +
  12 + public OfflineTts(OfflineTtsConfig config) {
  13 + ptr = newFromFile(config);
  14 + }
  15 +
  16 + public GeneratedAudio generate(String text) {
  17 + return generate(text, 0, 1.0f);
  18 + }
  19 +
  20 + public GeneratedAudio generate(String text, int sid) {
  21 + return generate(text, sid, 1.0f);
  22 + }
  23 +
  24 + public GeneratedAudio generate(String text, int sid, float speed) {
  25 + Object[] arr = generateImpl(ptr, text, sid, speed);
  26 + float[] samples = (float[]) arr[0];
  27 + int sampleRate = (int) arr[1];
  28 + return new GeneratedAudio(samples, sampleRate);
  29 + }
  30 +
  31 + @Override
  32 + protected void finalize() throws Throwable {
  33 + release();
  34 + }
  35 +
  36 + public void release() {
  37 + if (this.ptr == 0) {
  38 + return;
  39 + }
  40 + delete(this.ptr);
  41 + this.ptr = 0;
  42 + }
  43 +
  44 + private native void delete(long ptr);
  45 +
  46 + private native int getSampleRate(long ptr);
  47 +
  48 + private native int getNumSpeakers(long ptr);
  49 +
  50 + private native Object[] generateImpl(long ptr, String text, int sid, float speed);
  51 +
  52 + private native long newFromFile(OfflineTtsConfig config);
  53 +}
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class OfflineTtsConfig {
  6 + private final OfflineTtsModelConfig model;
  7 + private final String ruleFsts;
  8 + private final String ruleFars;
  9 + private final int maxNumSentences;
  10 +
  11 + private OfflineTtsConfig(Builder builder) {
  12 + this.model = builder.model;
  13 + this.ruleFsts = builder.ruleFsts;
  14 + this.ruleFars = builder.ruleFars;
  15 + this.maxNumSentences = builder.maxNumSentences;
  16 + }
  17 +
  18 + public static Builder builder() {
  19 + return new Builder();
  20 + }
  21 +
  22 + public OfflineTtsModelConfig getModel() {
  23 + return model;
  24 + }
  25 +
  26 + public String getRuleFsts() {
  27 + return ruleFsts;
  28 + }
  29 +
  30 + public String getRuleFars() {
  31 + return ruleFars;
  32 + }
  33 +
  34 + public int getMaxNumSentences() {
  35 + return maxNumSentences;
  36 + }
  37 +
  38 + public static class Builder {
  39 + private OfflineTtsModelConfig model = OfflineTtsModelConfig.builder().build();
  40 + private String ruleFsts = "";
  41 + private String ruleFars = "";
  42 + private int maxNumSentences = 1;
  43 +
  44 + public OfflineTtsConfig build() {
  45 + return new OfflineTtsConfig(this);
  46 + }
  47 +
  48 + public Builder setModel(OfflineTtsModelConfig model) {
  49 + this.model = model;
  50 + return this;
  51 + }
  52 +
  53 + public Builder setRuleFsts(String ruleFsts) {
  54 + this.ruleFsts = ruleFsts;
  55 + return this;
  56 + }
  57 +
  58 + public Builder setRuleFars(String ruleFars) {
  59 + this.ruleFars = ruleFars;
  60 + return this;
  61 + }
  62 +
  63 + public Builder setMaxNumSentences(int maxNumSentences) {
  64 + this.maxNumSentences = maxNumSentences;
  65 + return this;
  66 + }
  67 + }
  68 +}
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class OfflineTtsModelConfig {
  6 + private final OfflineTtsVitsModelConfig vits;
  7 + private final int numThreads;
  8 + private final boolean debug;
  9 + private final String provider;
  10 +
  11 + private OfflineTtsModelConfig(Builder builder) {
  12 + this.vits = builder.vits;
  13 + this.numThreads = builder.numThreads;
  14 + this.debug = builder.debug;
  15 + this.provider = builder.provider;
  16 + }
  17 +
  18 + public static Builder builder() {
  19 + return new Builder();
  20 + }
  21 +
  22 + public OfflineTtsVitsModelConfig getVits() {
  23 + return vits;
  24 + }
  25 +
  26 + public static class Builder {
  27 + private OfflineTtsVitsModelConfig vits = OfflineTtsVitsModelConfig.builder().build();
  28 + private int numThreads = 1;
  29 + private boolean debug = true;
  30 + private String provider = "cpu";
  31 +
  32 + public OfflineTtsModelConfig build() {
  33 + return new OfflineTtsModelConfig(this);
  34 + }
  35 +
  36 + public Builder setVits(OfflineTtsVitsModelConfig vits) {
  37 + this.vits = vits;
  38 + return this;
  39 + }
  40 +
  41 + public Builder setNumThreads(int numThreads) {
  42 + this.numThreads = numThreads;
  43 + return this;
  44 + }
  45 +
  46 + public Builder setDebug(boolean debug) {
  47 + this.debug = debug;
  48 + return this;
  49 + }
  50 +
  51 + public Builder setProvider(String provider) {
  52 + this.provider = provider;
  53 + return this;
  54 + }
  55 + }
  56 +}
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +package com.k2fsa.sherpa.onnx;
  4 +
  5 +public class OfflineTtsVitsModelConfig {
  6 + private final String model;
  7 + private final String lexicon;
  8 + private final String tokens;
  9 + private final String dataDir;
  10 + private final String dictDir;
  11 + private final float noiseScale;
  12 + private final float noiseScaleW;
  13 + private final float lengthScale;
  14 +
  15 + private OfflineTtsVitsModelConfig(Builder builder) {
  16 + this.model = builder.model;
  17 + this.lexicon = builder.lexicon;
  18 + this.tokens = builder.tokens;
  19 + this.dataDir = builder.dataDir;
  20 + this.dictDir = builder.dictDir;
  21 + this.noiseScale = builder.noiseScale;
  22 + this.noiseScaleW = builder.noiseScaleW;
  23 + this.lengthScale = builder.lengthScale;
  24 + }
  25 +
  26 + public static Builder builder() {
  27 + return new Builder();
  28 + }
  29 +
  30 + public String getModel() {
  31 + return model;
  32 + }
  33 +
  34 + public String getLexicon() {
  35 + return lexicon;
  36 + }
  37 +
  38 + public String getTokens() {
  39 + return tokens;
  40 + }
  41 +
  42 + public String getDataDir() {
  43 + return dataDir;
  44 + }
  45 +
  46 + public String getDictDir() {
  47 + return dictDir;
  48 + }
  49 +
  50 + public float getLengthScale() {
  51 + return lengthScale;
  52 + }
  53 +
  54 + public float getNoiseScale() {
  55 + return noiseScale;
  56 + }
  57 +
  58 + public float getNoiseScaleW() {
  59 + return noiseScaleW;
  60 + }
  61 +
  62 + public static class Builder {
  63 + private String model;
  64 + private String lexicon = "";
  65 + private String tokens;
  66 + private String dataDir = "";
  67 + private String dictDir = "";
  68 + private float noiseScale = 0.667f;
  69 + private float noiseScaleW = 0.8f;
  70 + private float lengthScale = 1.0f;
  71 +
  72 + public OfflineTtsVitsModelConfig build() {
  73 + return new OfflineTtsVitsModelConfig(this);
  74 + }
  75 +
  76 + public Builder setModel(String model) {
  77 + this.model = model;
  78 + return this;
  79 + }
  80 +
  81 + public Builder setTokens(String tokens) {
  82 + this.tokens = tokens;
  83 + return this;
  84 + }
  85 +
  86 + public Builder setLexicon(String lexicon) {
  87 + this.lexicon = lexicon;
  88 + return this;
  89 + }
  90 +
  91 + public Builder setDataDir(String dataDir) {
  92 + this.dataDir = dataDir;
  93 + return this;
  94 + }
  95 +
  96 + public Builder setDictDir(String dictDir) {
  97 + this.dictDir = dictDir;
  98 + return this;
  99 + }
  100 +
  101 + public Builder setNoiseScale(float noiseScale) {
  102 + this.noiseScale = noiseScale;
  103 + return this;
  104 + }
  105 +
  106 + public Builder setNoiseScaleW(float noiseScaleW) {
  107 + this.noiseScaleW = noiseScaleW;
  108 + return this;
  109 + }
  110 +
  111 + public Builder setLengthScale(float lengthScale) {
  112 + this.lengthScale = lengthScale;
  113 + return this;
  114 + }
  115 + }
  116 +}
1 // Copyright 2024 Xiaomi Corporation 1 // Copyright 2024 Xiaomi Corporation
  2 +
2 package com.k2fsa.sherpa.onnx; 3 package com.k2fsa.sherpa.onnx;
3 4
4 public class WaveReader { 5 public class WaveReader {