正在显示
15 个修改的文件
包含
692 行增加
和
0 行删除
| @@ -138,3 +138,21 @@ jobs: | @@ -138,3 +138,21 @@ jobs: | ||
| 138 | 138 | ||
| 139 | ./run-non-streaming-decode-file-nemo.sh | 139 | ./run-non-streaming-decode-file-nemo.sh |
| 140 | rm -rf sherpa-onnx-nemo-* | 140 | rm -rf sherpa-onnx-nemo-* |
| 141 | + | ||
| 142 | + - name: Run java test (Non-Streaming TTS) | ||
| 143 | + shell: bash | ||
| 144 | + run: | | ||
| 145 | + cd ./java-api-examples | ||
| 146 | + ./run-non-streaming-tts-piper-en.sh | ||
| 147 | + rm -rf vits-piper-* | ||
| 148 | + | ||
| 149 | + ./run-non-streaming-tts-coqui-de.sh | ||
| 150 | + rm -rf vits-coqui-* | ||
| 151 | + | ||
| 152 | + ./run-non-streaming-tts-vits-zh.sh | ||
| 153 | + rm -rf vits-zh-* | ||
| 154 | + | ||
| 155 | + - uses: actions/upload-artifact@v4 | ||
| 156 | + with: | ||
| 157 | + name: tts-wav-files-${{ matrix.os }} | ||
| 158 | + path: java-api-examples/*.wav |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a Coqui-ai VITS German TTS model | ||
| 4 | +// to convert text to speech | ||
| 5 | +import com.k2fsa.sherpa.onnx.*; | ||
| 6 | + | ||
| 7 | +public class NonStreamingTtsCoquiDe { | ||
| 8 | + public static void main(String[] args) { | ||
| 9 | + // please visit | ||
| 10 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 11 | + // to download model files | ||
| 12 | + String model = "./vits-coqui-de-css10/model.onnx"; | ||
| 13 | + String tokens = "./vits-coqui-de-css10/tokens.txt"; | ||
| 14 | + String text = "Alles hat ein Ende, nur die Wurst hat zwei."; | ||
| 15 | + | ||
| 16 | + OfflineTtsVitsModelConfig vitsModelConfig = | ||
| 17 | + OfflineTtsVitsModelConfig.builder().setModel(model).setTokens(tokens).build(); | ||
| 18 | + | ||
| 19 | + OfflineTtsModelConfig modelConfig = | ||
| 20 | + OfflineTtsModelConfig.builder() | ||
| 21 | + .setVits(vitsModelConfig) | ||
| 22 | + .setNumThreads(1) | ||
| 23 | + .setDebug(true) | ||
| 24 | + .build(); | ||
| 25 | + | ||
| 26 | + OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build(); | ||
| 27 | + OfflineTts tts = new OfflineTts(config); | ||
| 28 | + | ||
| 29 | + int sid = 0; | ||
| 30 | + float speed = 1.0f; | ||
| 31 | + long start = System.currentTimeMillis(); | ||
| 32 | + GeneratedAudio audio = tts.generate(text, sid, speed); | ||
| 33 | + long stop = System.currentTimeMillis(); | ||
| 34 | + | ||
| 35 | + float timeElapsedSeconds = (stop - start) / 1000.0f; | ||
| 36 | + | ||
| 37 | + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate(); | ||
| 38 | + float real_time_factor = timeElapsedSeconds / audioDuration; | ||
| 39 | + | ||
| 40 | + String waveFilename = "tts-coqui-de.wav"; | ||
| 41 | + audio.save(waveFilename); | ||
| 42 | + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds); | ||
| 43 | + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds); | ||
| 44 | + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor); | ||
| 45 | + System.out.printf("-- text: %s\n", text); | ||
| 46 | + System.out.printf("-- Saved to %s\n", waveFilename); | ||
| 47 | + | ||
| 48 | + tts.release(); | ||
| 49 | + } | ||
| 50 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a piper VITS English TTS model | ||
| 4 | +// to convert text to speech | ||
| 5 | +import com.k2fsa.sherpa.onnx.*; | ||
| 6 | + | ||
| 7 | +public class NonStreamingTtsPiperEn { | ||
| 8 | + public static void main(String[] args) { | ||
| 9 | + // please visit | ||
| 10 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 11 | + // to download model files | ||
| 12 | + String model = "./vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx"; | ||
| 13 | + String tokens = "./vits-piper-en_GB-cori-medium/tokens.txt"; | ||
| 14 | + String dataDir = "./vits-piper-en_GB-cori-medium/espeak-ng-data"; | ||
| 15 | + String text = | ||
| 16 | + "Today as always, men fall into two groups: slaves and free men. Whoever does not have" | ||
| 17 | + + " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a" | ||
| 18 | + + " businessman, an official, or a scholar."; | ||
| 19 | + | ||
| 20 | + OfflineTtsVitsModelConfig vitsModelConfig = | ||
| 21 | + OfflineTtsVitsModelConfig.builder() | ||
| 22 | + .setModel(model) | ||
| 23 | + .setTokens(tokens) | ||
| 24 | + .setDataDir(dataDir) | ||
| 25 | + .build(); | ||
| 26 | + | ||
| 27 | + OfflineTtsModelConfig modelConfig = | ||
| 28 | + OfflineTtsModelConfig.builder() | ||
| 29 | + .setVits(vitsModelConfig) | ||
| 30 | + .setNumThreads(1) | ||
| 31 | + .setDebug(true) | ||
| 32 | + .build(); | ||
| 33 | + | ||
| 34 | + OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build(); | ||
| 35 | + OfflineTts tts = new OfflineTts(config); | ||
| 36 | + | ||
| 37 | + int sid = 0; | ||
| 38 | + float speed = 1.0f; | ||
| 39 | + long start = System.currentTimeMillis(); | ||
| 40 | + GeneratedAudio audio = tts.generate(text, sid, speed); | ||
| 41 | + long stop = System.currentTimeMillis(); | ||
| 42 | + | ||
| 43 | + float timeElapsedSeconds = (stop - start) / 1000.0f; | ||
| 44 | + | ||
| 45 | + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate(); | ||
| 46 | + float real_time_factor = timeElapsedSeconds / audioDuration; | ||
| 47 | + | ||
| 48 | + String waveFilename = "tts-piper-en.wav"; | ||
| 49 | + audio.save(waveFilename); | ||
| 50 | + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds); | ||
| 51 | + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds); | ||
| 52 | + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor); | ||
| 53 | + System.out.printf("-- text: %s\n", text); | ||
| 54 | + System.out.printf("-- Saved to %s\n", waveFilename); | ||
| 55 | + | ||
| 56 | + tts.release(); | ||
| 57 | + } | ||
| 58 | +} |
java-api-examples/NonStreamingTtsVitsZh.java
0 → 100644
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use a VITS Chinese TTS model | ||
| 4 | +// to convert text to speech. | ||
| 5 | +// | ||
| 6 | +// You can use https://github.com/Plachtaa/VITS-fast-fine-tuning | ||
| 7 | +// to train your model | ||
| 8 | +import com.k2fsa.sherpa.onnx.*; | ||
| 9 | + | ||
| 10 | +public class NonStreamingTtsPiperEn { | ||
| 11 | + public static void main(String[] args) { | ||
| 12 | + // please visit | ||
| 13 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 14 | + // to download model files | ||
| 15 | + String model = "./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx"; | ||
| 16 | + String tokens = "./vits-zh-hf-fanchen-C/tokens.txt"; | ||
| 17 | + String lexicon = "./vits-zh-hf-fanchen-C/lexicon.txt"; | ||
| 18 | + String dictDir = "./vits-zh-hf-fanchen-C/dict"; | ||
| 19 | + String ruleFsts = | ||
| 20 | + "./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst"; | ||
| 21 | + String text = "有问题,请拨打110或者手机18601239876。我们的价值观是真诚热爱!"; | ||
| 22 | + | ||
| 23 | + OfflineTtsVitsModelConfig vitsModelConfig = | ||
| 24 | + OfflineTtsVitsModelConfig.builder() | ||
| 25 | + .setModel(model) | ||
| 26 | + .setTokens(tokens) | ||
| 27 | + .setLexicon(lexicon) | ||
| 28 | + .setDictDir(dictDir) | ||
| 29 | + .build(); | ||
| 30 | + | ||
| 31 | + OfflineTtsModelConfig modelConfig = | ||
| 32 | + OfflineTtsModelConfig.builder() | ||
| 33 | + .setVits(vitsModelConfig) | ||
| 34 | + .setNumThreads(1) | ||
| 35 | + .setDebug(true) | ||
| 36 | + .build(); | ||
| 37 | + | ||
| 38 | + OfflineTtsConfig config = | ||
| 39 | + OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build(); | ||
| 40 | + | ||
| 41 | + OfflineTts tts = new OfflineTts(config); | ||
| 42 | + | ||
| 43 | + int sid = 100; | ||
| 44 | + float speed = 1.0f; | ||
| 45 | + long start = System.currentTimeMillis(); | ||
| 46 | + GeneratedAudio audio = tts.generate(text, sid, speed); | ||
| 47 | + long stop = System.currentTimeMillis(); | ||
| 48 | + | ||
| 49 | + float timeElapsedSeconds = (stop - start) / 1000.0f; | ||
| 50 | + | ||
| 51 | + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate(); | ||
| 52 | + float real_time_factor = timeElapsedSeconds / audioDuration; | ||
| 53 | + | ||
| 54 | + String waveFilename = "tts-vits-zh.wav"; | ||
| 55 | + audio.save(waveFilename); | ||
| 56 | + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds); | ||
| 57 | + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds); | ||
| 58 | + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor); | ||
| 59 | + System.out.printf("-- text: %s\n", text); | ||
| 60 | + System.out.printf("-- Saved to %s\n", waveFilename); | ||
| 61 | + | ||
| 62 | + tts.release(); | ||
| 63 | + } | ||
| 64 | +} |
| @@ -21,3 +21,11 @@ This directory contains examples for the JAVA API of sherpa-onnx. | @@ -21,3 +21,11 @@ This directory contains examples for the JAVA API of sherpa-onnx. | ||
| 21 | ./run-non-streaming-decode-file-whisper.sh | 21 | ./run-non-streaming-decode-file-whisper.sh |
| 22 | ./run-non-streaming-decode-file-nemo.sh | 22 | ./run-non-streaming-decode-file-nemo.sh |
| 23 | ``` | 23 | ``` |
| 24 | + | ||
| 25 | +## Non-Streaming text-to-speech | ||
| 26 | + | ||
| 27 | +```bash | ||
| 28 | +./run-non-streaming-tts-piper-en.sh | ||
| 29 | +./run-non-streaming-tts-coqui-de.sh | ||
| 30 | +./run-non-streaming-tts-vits-zh.sh | ||
| 31 | +``` |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 29 | + cmake \ | ||
| 30 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 31 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 32 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 33 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 34 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 35 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 36 | + .. | ||
| 37 | + | ||
| 38 | + make -j4 | ||
| 39 | + ls -lh lib | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +# please visit | ||
| 43 | +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 44 | +# to download more models | ||
| 45 | +if [ ! -f ./vits-coqui-de-css10/tokens.txt ]; then | ||
| 46 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 | ||
| 47 | + tar xf vits-coqui-de-css10.tar.bz2 | ||
| 48 | + rm vits-coqui-de-css10.tar.bz2 | ||
| 49 | +fi | ||
| 50 | + | ||
| 51 | +java \ | ||
| 52 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 53 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 54 | + NonStreamingTtsCoquiDe.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 29 | + cmake \ | ||
| 30 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 31 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 32 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 33 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 34 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 35 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 36 | + .. | ||
| 37 | + | ||
| 38 | + make -j4 | ||
| 39 | + ls -lh lib | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +# please visit | ||
| 43 | +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 44 | +# to download more models | ||
| 45 | +if [ ! -f ./vits-piper-en_GB-cori-medium/tokens.txt ]; then | ||
| 46 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2 | ||
| 47 | + tar xf vits-piper-en_GB-cori-medium.tar.bz2 | ||
| 48 | + rm vits-piper-en_GB-cori-medium.tar.bz2 | ||
| 49 | +fi | ||
| 50 | + | ||
| 51 | +java \ | ||
| 52 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 53 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 54 | + NonStreamingTtsPiperEn.java |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 29 | + cmake \ | ||
| 30 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 31 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 32 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 33 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 34 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 35 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 36 | + .. | ||
| 37 | + | ||
| 38 | + make -j4 | ||
| 39 | + ls -lh lib | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +# please visit | ||
| 43 | +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 44 | +# to download more models | ||
| 45 | +if [ ! -f ./vits-zh-hf-fanchen-C/tokens.txt ]; then | ||
| 46 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2 | ||
| 47 | + tar xf vits-zh-hf-fanchen-C.tar.bz2 | ||
| 48 | + rm vits-zh-hf-fanchen-C.tar.bz2 | ||
| 49 | +fi | ||
| 50 | + | ||
| 51 | +java \ | ||
| 52 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 53 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 54 | + NonStreamingTtsVitsZh.java |
| @@ -30,6 +30,12 @@ java_files += OfflineRecognizerResult.java | @@ -30,6 +30,12 @@ java_files += OfflineRecognizerResult.java | ||
| 30 | java_files += OfflineStream.java | 30 | java_files += OfflineStream.java |
| 31 | java_files += OfflineRecognizer.java | 31 | java_files += OfflineRecognizer.java |
| 32 | 32 | ||
| 33 | +java_files += OfflineTtsVitsModelConfig.java | ||
| 34 | +java_files += OfflineTtsModelConfig.java | ||
| 35 | +java_files += OfflineTtsConfig.java | ||
| 36 | +java_files += GeneratedAudio.java | ||
| 37 | +java_files += OfflineTts.java | ||
| 38 | + | ||
| 33 | class_files := $(java_files:%.java=%.class) | 39 | class_files := $(java_files:%.java=%.class) |
| 34 | 40 | ||
| 35 | java_files := $(addprefix src/$(package_dir)/,$(java_files)) | 41 | java_files := $(addprefix src/$(package_dir)/,$(java_files)) |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class GeneratedAudio { | ||
| 6 | + static { | ||
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + private final float[] samples; | ||
| 11 | + private final int sampleRate; | ||
| 12 | + | ||
| 13 | + public GeneratedAudio(float[] samples, int sampleRate) { | ||
| 14 | + this.samples = samples; | ||
| 15 | + this.sampleRate = sampleRate; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public int getSampleRate() { | ||
| 19 | + return sampleRate; | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public float[] getSamples() { | ||
| 23 | + return samples; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + // return true if saved successfully. | ||
| 27 | + public boolean save(String filename) { | ||
| 28 | + return saveImpl(filename, samples, sampleRate); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + private native boolean saveImpl(String filename, float[] samples, int sampleRate); | ||
| 32 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineTts { | ||
| 6 | + static { | ||
| 7 | + System.loadLibrary("sherpa-onnx-jni"); | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + private long ptr = 0; // this is the asr engine ptrss | ||
| 11 | + | ||
| 12 | + public OfflineTts(OfflineTtsConfig config) { | ||
| 13 | + ptr = newFromFile(config); | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + public GeneratedAudio generate(String text) { | ||
| 17 | + return generate(text, 0, 1.0f); | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + public GeneratedAudio generate(String text, int sid) { | ||
| 21 | + return generate(text, sid, 1.0f); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + public GeneratedAudio generate(String text, int sid, float speed) { | ||
| 25 | + Object[] arr = generateImpl(ptr, text, sid, speed); | ||
| 26 | + float[] samples = (float[]) arr[0]; | ||
| 27 | + int sampleRate = (int) arr[1]; | ||
| 28 | + return new GeneratedAudio(samples, sampleRate); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + @Override | ||
| 32 | + protected void finalize() throws Throwable { | ||
| 33 | + release(); | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public void release() { | ||
| 37 | + if (this.ptr == 0) { | ||
| 38 | + return; | ||
| 39 | + } | ||
| 40 | + delete(this.ptr); | ||
| 41 | + this.ptr = 0; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + private native void delete(long ptr); | ||
| 45 | + | ||
| 46 | + private native int getSampleRate(long ptr); | ||
| 47 | + | ||
| 48 | + private native int getNumSpeakers(long ptr); | ||
| 49 | + | ||
| 50 | + private native Object[] generateImpl(long ptr, String text, int sid, float speed); | ||
| 51 | + | ||
| 52 | + private native long newFromFile(OfflineTtsConfig config); | ||
| 53 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineTtsConfig { | ||
| 6 | + private final OfflineTtsModelConfig model; | ||
| 7 | + private final String ruleFsts; | ||
| 8 | + private final String ruleFars; | ||
| 9 | + private final int maxNumSentences; | ||
| 10 | + | ||
| 11 | + private OfflineTtsConfig(Builder builder) { | ||
| 12 | + this.model = builder.model; | ||
| 13 | + this.ruleFsts = builder.ruleFsts; | ||
| 14 | + this.ruleFars = builder.ruleFars; | ||
| 15 | + this.maxNumSentences = builder.maxNumSentences; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static Builder builder() { | ||
| 19 | + return new Builder(); | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public OfflineTtsModelConfig getModel() { | ||
| 23 | + return model; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public String getRuleFsts() { | ||
| 27 | + return ruleFsts; | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + public String getRuleFars() { | ||
| 31 | + return ruleFars; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + public int getMaxNumSentences() { | ||
| 35 | + return maxNumSentences; | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + public static class Builder { | ||
| 39 | + private OfflineTtsModelConfig model = OfflineTtsModelConfig.builder().build(); | ||
| 40 | + private String ruleFsts = ""; | ||
| 41 | + private String ruleFars = ""; | ||
| 42 | + private int maxNumSentences = 1; | ||
| 43 | + | ||
| 44 | + public OfflineTtsConfig build() { | ||
| 45 | + return new OfflineTtsConfig(this); | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + public Builder setModel(OfflineTtsModelConfig model) { | ||
| 49 | + this.model = model; | ||
| 50 | + return this; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + public Builder setRuleFsts(String ruleFsts) { | ||
| 54 | + this.ruleFsts = ruleFsts; | ||
| 55 | + return this; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + public Builder setRuleFars(String ruleFars) { | ||
| 59 | + this.ruleFars = ruleFars; | ||
| 60 | + return this; | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + public Builder setMaxNumSentences(int maxNumSentences) { | ||
| 64 | + this.maxNumSentences = maxNumSentences; | ||
| 65 | + return this; | ||
| 66 | + } | ||
| 67 | + } | ||
| 68 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineTtsModelConfig { | ||
| 6 | + private final OfflineTtsVitsModelConfig vits; | ||
| 7 | + private final int numThreads; | ||
| 8 | + private final boolean debug; | ||
| 9 | + private final String provider; | ||
| 10 | + | ||
| 11 | + private OfflineTtsModelConfig(Builder builder) { | ||
| 12 | + this.vits = builder.vits; | ||
| 13 | + this.numThreads = builder.numThreads; | ||
| 14 | + this.debug = builder.debug; | ||
| 15 | + this.provider = builder.provider; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static Builder builder() { | ||
| 19 | + return new Builder(); | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + public OfflineTtsVitsModelConfig getVits() { | ||
| 23 | + return vits; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public static class Builder { | ||
| 27 | + private OfflineTtsVitsModelConfig vits = OfflineTtsVitsModelConfig.builder().build(); | ||
| 28 | + private int numThreads = 1; | ||
| 29 | + private boolean debug = true; | ||
| 30 | + private String provider = "cpu"; | ||
| 31 | + | ||
| 32 | + public OfflineTtsModelConfig build() { | ||
| 33 | + return new OfflineTtsModelConfig(this); | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + public Builder setVits(OfflineTtsVitsModelConfig vits) { | ||
| 37 | + this.vits = vits; | ||
| 38 | + return this; | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + public Builder setNumThreads(int numThreads) { | ||
| 42 | + this.numThreads = numThreads; | ||
| 43 | + return this; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public Builder setDebug(boolean debug) { | ||
| 47 | + this.debug = debug; | ||
| 48 | + return this; | ||
| 49 | + } | ||
| 50 | + | ||
| 51 | + public Builder setProvider(String provider) { | ||
| 52 | + this.provider = provider; | ||
| 53 | + return this; | ||
| 54 | + } | ||
| 55 | + } | ||
| 56 | +} |
| 1 | +// Copyright 2024 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +package com.k2fsa.sherpa.onnx; | ||
| 4 | + | ||
| 5 | +public class OfflineTtsVitsModelConfig { | ||
| 6 | + private final String model; | ||
| 7 | + private final String lexicon; | ||
| 8 | + private final String tokens; | ||
| 9 | + private final String dataDir; | ||
| 10 | + private final String dictDir; | ||
| 11 | + private final float noiseScale; | ||
| 12 | + private final float noiseScaleW; | ||
| 13 | + private final float lengthScale; | ||
| 14 | + | ||
| 15 | + private OfflineTtsVitsModelConfig(Builder builder) { | ||
| 16 | + this.model = builder.model; | ||
| 17 | + this.lexicon = builder.lexicon; | ||
| 18 | + this.tokens = builder.tokens; | ||
| 19 | + this.dataDir = builder.dataDir; | ||
| 20 | + this.dictDir = builder.dictDir; | ||
| 21 | + this.noiseScale = builder.noiseScale; | ||
| 22 | + this.noiseScaleW = builder.noiseScaleW; | ||
| 23 | + this.lengthScale = builder.lengthScale; | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + public static Builder builder() { | ||
| 27 | + return new Builder(); | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + public String getModel() { | ||
| 31 | + return model; | ||
| 32 | + } | ||
| 33 | + | ||
| 34 | + public String getLexicon() { | ||
| 35 | + return lexicon; | ||
| 36 | + } | ||
| 37 | + | ||
| 38 | + public String getTokens() { | ||
| 39 | + return tokens; | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + public String getDataDir() { | ||
| 43 | + return dataDir; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + public String getDictDir() { | ||
| 47 | + return dictDir; | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + public float getLengthScale() { | ||
| 51 | + return lengthScale; | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + public float getNoiseScale() { | ||
| 55 | + return noiseScale; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + public float getNoiseScaleW() { | ||
| 59 | + return noiseScaleW; | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + public static class Builder { | ||
| 63 | + private String model; | ||
| 64 | + private String lexicon = ""; | ||
| 65 | + private String tokens; | ||
| 66 | + private String dataDir = ""; | ||
| 67 | + private String dictDir = ""; | ||
| 68 | + private float noiseScale = 0.667f; | ||
| 69 | + private float noiseScaleW = 0.8f; | ||
| 70 | + private float lengthScale = 1.0f; | ||
| 71 | + | ||
| 72 | + public OfflineTtsVitsModelConfig build() { | ||
| 73 | + return new OfflineTtsVitsModelConfig(this); | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + public Builder setModel(String model) { | ||
| 77 | + this.model = model; | ||
| 78 | + return this; | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + public Builder setTokens(String tokens) { | ||
| 82 | + this.tokens = tokens; | ||
| 83 | + return this; | ||
| 84 | + } | ||
| 85 | + | ||
| 86 | + public Builder setLexicon(String lexicon) { | ||
| 87 | + this.lexicon = lexicon; | ||
| 88 | + return this; | ||
| 89 | + } | ||
| 90 | + | ||
| 91 | + public Builder setDataDir(String dataDir) { | ||
| 92 | + this.dataDir = dataDir; | ||
| 93 | + return this; | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + public Builder setDictDir(String dictDir) { | ||
| 97 | + this.dictDir = dictDir; | ||
| 98 | + return this; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + public Builder setNoiseScale(float noiseScale) { | ||
| 102 | + this.noiseScale = noiseScale; | ||
| 103 | + return this; | ||
| 104 | + } | ||
| 105 | + | ||
| 106 | + public Builder setNoiseScaleW(float noiseScaleW) { | ||
| 107 | + this.noiseScaleW = noiseScaleW; | ||
| 108 | + return this; | ||
| 109 | + } | ||
| 110 | + | ||
| 111 | + public Builder setLengthScale(float lengthScale) { | ||
| 112 | + this.lengthScale = lengthScale; | ||
| 113 | + return this; | ||
| 114 | + } | ||
| 115 | + } | ||
| 116 | +} |
-
请 注册 或 登录 后发表评论