Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-07-28 12:07:19 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-07-28 12:07:19 +0800
Commit
1f8e57513351feb7e5b0dc5fdbca9a9258ff359f
1f8e5751
1 parent
4e6aeff0
Add TTS example for Java API. (#1176)
It plays the generated audio as it is still generating.
隐藏空白字符变更
内嵌
并排对比
正在显示
6 个修改的文件
包含
262 行增加
和
0 行删除
java-api-examples/NonStreamingTtsPiperEnWithCallback.java
java-api-examples/README.md
java-api-examples/run-non-streaming-tts-piper-en-with-callback.sh
sherpa-onnx/java-api/Makefile
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTts.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java
java-api-examples/NonStreamingTtsPiperEnWithCallback.java
0 → 100644
查看文件 @
1f8e575
// Copyright 2024 Xiaomi Corporation
//
// References
// https://www.baeldung.com/java-passing-method-parameter
// https://www.geeksforgeeks.org/how-to-create-a-thread-safe-queue-in-java/
// https://stackoverflow.com/questions/74077394/java-audio-how-to-continuously-write-bytes-to-an-audio-file-as-they-are-being-g
// This file shows how to use a piper VITS English TTS model
// to convert text to speech. You can pass a callback to the generation call,
// which is invoked whenever max_num_sentences sentences have been
// finished generation.
//
// The callback saves the generated samples into a queue, which are played
// by a separate thread.
import
com.k2fsa.sherpa.onnx.*
;
import
java.util.Queue
;
import
java.util.concurrent.*
;
import
java.util.concurrent.ConcurrentLinkedQueue
;
import
javax.sound.sampled.*
;
public
class
NonStreamingTtsPiperEn
{
public
static
void
main
(
String
[]
args
)
{
// please visit
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download model files
String
model
=
"./vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx"
;
String
tokens
=
"./vits-piper-en_GB-cori-medium/tokens.txt"
;
String
dataDir
=
"./vits-piper-en_GB-cori-medium/espeak-ng-data"
;
String
text
=
"Today as always, men fall into two groups: slaves and free men. Whoever does not have"
+
" two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
+
" businessman, an official, or a scholar."
;
OfflineTtsVitsModelConfig
vitsModelConfig
=
OfflineTtsVitsModelConfig
.
builder
()
.
setModel
(
model
)
.
setTokens
(
tokens
)
.
setDataDir
(
dataDir
)
.
build
();
OfflineTtsModelConfig
modelConfig
=
OfflineTtsModelConfig
.
builder
()
.
setVits
(
vitsModelConfig
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
build
();
OfflineTtsConfig
config
=
OfflineTtsConfig
.
builder
().
setModel
(
modelConfig
).
build
();
OfflineTts
tts
=
new
OfflineTts
(
config
);
Queue
<
byte
[]>
samplesQueue
=
new
ConcurrentLinkedQueue
<>();
Semaphore
canPlaySem
=
new
Semaphore
(
1
);
try
{
canPlaySem
.
acquire
();
}
catch
(
InterruptedException
ex
)
{
System
.
out
.
println
(
"Failed to acquire the play semaphore in the main thread"
);
return
;
}
Runnable
playRuannable
=
()
->
{
try
{
canPlaySem
.
acquire
();
}
catch
(
InterruptedException
e
)
{
System
.
out
.
println
(
"Failed to get canPlay semaphore in the play thread"
);
return
;
}
// https://docs.oracle.com/javase/8/docs/api/javax/sound/sampled/AudioFormat.html
AudioFormat
format
=
new
AudioFormat
(
tts
.
getSampleRate
(),
// sampleRate
16
,
// sampleSizeInBits
1
,
// channels
true
,
// signed
false
// bigEndian
);
DataLine
.
Info
info
=
new
DataLine
.
Info
(
SourceDataLine
.
class
,
format
);
SourceDataLine
line
;
try
{
line
=
(
SourceDataLine
)
AudioSystem
.
getLine
(
info
);
int
bufferSizeInBytes
=
tts
.
getSampleRate
();
// 0.5 seconds
line
.
open
(
format
,
bufferSizeInBytes
);
}
catch
(
LineUnavailableException
ex
)
{
System
.
out
.
println
(
"Failed to open a device for playing"
);
return
;
}
line
.
start
();
while
(
true
)
{
if
(
samplesQueue
.
isEmpty
())
{
// Do nothing.
//
// If the generating speed is very slow, we can sleep
// for some time here to save some CPU.
}
else
{
byte
[]
samples
=
samplesQueue
.
poll
();
if
(
samples
.
length
==
1
)
{
// end of the generating
break
;
}
line
.
write
(
samples
,
0
,
samples
.
length
);
}
}
line
.
drain
();
line
.
close
();
};
Thread
playThread
=
new
Thread
(
playRuannable
);
playThread
.
start
();
int
sid
=
0
;
float
speed
=
1.0f
;
long
start
=
System
.
currentTimeMillis
();
GeneratedAudio
audio
=
tts
.
generateWithCallback
(
text
,
sid
,
speed
,
(
float
[]
samples
)
->
{
// we use a byte array to save int16 samples
byte
[]
samplesInt16
=
new
byte
[
samples
.
length
*
2
];
for
(
int
i
=
0
;
i
<
samples
.
length
;
++
i
)
{
float
s
=
samples
[
i
];
if
(
s
>
1
)
{
s
=
1
;
}
if
(
s
<
-
1
)
{
s
=
-
1
;
}
short
t
=
(
short
)
(
s
*
32767
);
// we use little endian
samplesInt16
[
2
*
i
]
=
(
byte
)
(
t
&
0xff
);
samplesInt16
[
2
*
i
+
1
]
=
(
byte
)
((
t
&
0xff00
)
>>
8
);
}
samplesQueue
.
add
(
samplesInt16
);
canPlaySem
.
release
();
// Note: You can play the samples.
// warning: You need to save a copy of samples since it is freed
// when this function returns
// return 1 to continue generation
// return 0 to stop generation
return
1
;
});
// Since a sample always has two bytes. We put a single byte
// into the queue to indicate that we have finished processing.
samplesQueue
.
add
(
new
byte
[
1
]);
long
stop
=
System
.
currentTimeMillis
();
float
timeElapsedSeconds
=
(
stop
-
start
)
/
1000.0f
;
float
audioDuration
=
audio
.
getSamples
().
length
/
(
float
)
audio
.
getSampleRate
();
float
real_time_factor
=
timeElapsedSeconds
/
audioDuration
;
try
{
playThread
.
join
();
}
catch
(
InterruptedException
ex
)
{
System
.
out
.
println
(
"Failed to join the play thread"
);
return
;
}
String
waveFilename
=
"tts-piper-en.wav"
;
audio
.
save
(
waveFilename
);
System
.
out
.
printf
(
"-- elapsed : %.3f seconds\n"
,
timeElapsedSeconds
);
System
.
out
.
printf
(
"-- audio duration: %.3f seconds\n"
,
timeElapsedSeconds
);
System
.
out
.
printf
(
"-- real-time factor (RTF): %.3f\n"
,
real_time_factor
);
System
.
out
.
printf
(
"-- text: %s\n"
,
text
);
System
.
out
.
printf
(
"-- Saved to %s\n"
,
waveFilename
);
tts
.
release
();
}
}
...
...
java-api-examples/README.md
查看文件 @
1f8e575
...
...
@@ -24,6 +24,7 @@ This directory contains examples for the JAVA API of sherpa-onnx.
./run-non-streaming-decode-file-nemo.sh
```
## Non-Streaming text-to-speech
```
bash
...
...
@@ -32,6 +33,12 @@ This directory contains examples for the JAVA API of sherpa-onnx.
./run-non-streaming-tts-vits-zh.sh
```
## Non-Streaming text-to-speech (Play as it is generating)
```
bash
./run-non-streaming-tts-piper-en-with-callback.sh
```
## Spoken language identification
```
bash
...
...
java-api-examples/run-non-streaming-tts-piper-en-with-callback.sh
0 → 100755
查看文件 @
1f8e575
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
mkdir -p ../build
pushd
../build
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
popd
fi
if
[
! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar
]
;
then
pushd
../sherpa-onnx/java-api
make
popd
fi
# please visit
# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
# to download more models
if
[
! -f ./vits-piper-en_GB-cori-medium/tokens.txt
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2
fi
java
\
-Djava.library.path
=
$PWD
/../build/lib
\
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar
\
NonStreamingTtsPiperEnWithCallback.java
...
...
sherpa-onnx/java-api/Makefile
查看文件 @
1f8e575
...
...
@@ -38,6 +38,7 @@ java_files += OfflineTtsVitsModelConfig.java
java_files
+=
OfflineTtsModelConfig.java
java_files
+=
OfflineTtsConfig.java
java_files
+=
GeneratedAudio.java
java_files
+=
OfflineTtsCallback.java
java_files
+=
OfflineTts.java
java_files
+=
SpokenLanguageIdentificationWhisperConfig.java
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTts.java
查看文件 @
1f8e575
...
...
@@ -2,6 +2,7 @@
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
OfflineTts
{
static
{
System
.
loadLibrary
(
"sherpa-onnx-jni"
);
...
...
@@ -13,6 +14,10 @@ public class OfflineTts {
ptr
=
newFromFile
(
config
);
}
public
int
getSampleRate
()
{
return
getSampleRate
(
ptr
);
}
public
GeneratedAudio
generate
(
String
text
)
{
return
generate
(
text
,
0
,
1.0f
);
}
...
...
@@ -28,6 +33,21 @@ public class OfflineTts {
return
new
GeneratedAudio
(
samples
,
sampleRate
);
}
public
GeneratedAudio
generateWithCallback
(
String
text
,
OfflineTtsCallback
callback
)
{
return
generateWithCallback
(
text
,
0
,
1.0f
,
callback
);
}
public
GeneratedAudio
generateWithCallback
(
String
text
,
int
sid
,
OfflineTtsCallback
callback
)
{
return
generateWithCallback
(
text
,
sid
,
1.0f
,
callback
);
}
public
GeneratedAudio
generateWithCallback
(
String
text
,
int
sid
,
float
speed
,
OfflineTtsCallback
callback
)
{
Object
[]
arr
=
generateWithCallbackImpl
(
ptr
,
text
,
sid
,
speed
,
callback
);
float
[]
samples
=
(
float
[])
arr
[
0
];
int
sampleRate
=
(
int
)
arr
[
1
];
return
new
GeneratedAudio
(
samples
,
sampleRate
);
}
@Override
protected
void
finalize
()
throws
Throwable
{
release
();
...
...
@@ -49,5 +69,7 @@ public class OfflineTts {
private
native
Object
[]
generateImpl
(
long
ptr
,
String
text
,
int
sid
,
float
speed
);
private
native
Object
[]
generateWithCallbackImpl
(
long
ptr
,
String
text
,
int
sid
,
float
speed
,
OfflineTtsCallback
callback
);
private
native
long
newFromFile
(
OfflineTtsConfig
config
);
}
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java
0 → 100644
查看文件 @
1f8e575
package
com
.
k2fsa
.
sherpa
.
onnx
;
@FunctionalInterface
public
interface
OfflineTtsCallback
{
Integer
invoke
(
float
[]
samples
);
}
...
...
请
注册
或
登录
后发表评论