Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-01-02 15:15:30 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-01-02 15:15:30 +0800
Commit
a00d3b482123e65e3544e40ec0360e3d66d47faa
a00d3b48
1 parent
f457baea
Add Java API for Matcha-TTS models. (#1673)
隐藏空白字符变更
内嵌
并排对比
正在显示
11 个修改的文件
包含
359 行增加
和
2 行删除
.github/workflows/run-java-test.yaml
.gitignore
java-api-examples/NonStreamingTtsMatchaEn.java
java-api-examples/NonStreamingTtsMatchaZh.java
java-api-examples/run-non-streaming-tts-matcha-en.sh
java-api-examples/run-non-streaming-tts-matcha-zh.sh
sherpa-onnx/csrc/piper-phonemize-lexicon.cc
sherpa-onnx/java-api/Makefile
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java
.github/workflows/run-java-test.yaml
查看文件 @
a00d3b4
...
...
@@ -235,6 +235,13 @@ jobs:
shell
:
bash
run
:
|
cd ./java-api-examples
./run-non-streaming-tts-matcha-zh.sh
./run-non-streaming-tts-matcha-en.sh
rm -rf matcha-icefall-*
rm hifigan_v2.onnx
./run-non-streaming-tts-piper-en.sh
rm -rf vits-piper-*
...
...
.gitignore
查看文件 @
a00d3b4
...
...
@@ -126,3 +126,4 @@ sherpa-onnx-moonshine-base-en-int8
harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md
matcha-icefall-zh-baker
matcha-icefall-en_US-ljspeech
...
...
java-api-examples/NonStreamingTtsMatchaEn.java
0 → 100644
查看文件 @
a00d3b4
// Copyright 2025 Xiaomi Corporation
// This file shows how to use a matcha English model
// to convert text to speech
import
com.k2fsa.sherpa.onnx.*
;
public
class
NonStreamingTtsMatchaEn
{
public
static
void
main
(
String
[]
args
)
{
// please visit
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// to download model files
String
acousticModel
=
"./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
;
String
vocoder
=
"./hifigan_v2.onnx"
;
String
tokens
=
"./matcha-icefall-en_US-ljspeech/tokens.txt"
;
String
dataDir
=
"./matcha-icefall-en_US-ljspeech/espeak-ng-data"
;
String
text
=
"Today as always, men fall into two groups: slaves and free men. Whoever does not have"
+
" two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
+
" businessman, an official, or a scholar."
;
OfflineTtsMatchaModelConfig
matchaModelConfig
=
OfflineTtsMatchaModelConfig
.
builder
()
.
setAcousticModel
(
acousticModel
)
.
setVocoder
(
vocoder
)
.
setTokens
(
tokens
)
.
setDataDir
(
dataDir
)
.
build
();
OfflineTtsModelConfig
modelConfig
=
OfflineTtsModelConfig
.
builder
()
.
setMatcha
(
matchaModelConfig
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
build
();
OfflineTtsConfig
config
=
OfflineTtsConfig
.
builder
().
setModel
(
modelConfig
).
build
();
OfflineTts
tts
=
new
OfflineTts
(
config
);
int
sid
=
0
;
float
speed
=
1.0f
;
long
start
=
System
.
currentTimeMillis
();
GeneratedAudio
audio
=
tts
.
generate
(
text
,
sid
,
speed
);
long
stop
=
System
.
currentTimeMillis
();
float
timeElapsedSeconds
=
(
stop
-
start
)
/
1000.0f
;
float
audioDuration
=
audio
.
getSamples
().
length
/
(
float
)
audio
.
getSampleRate
();
float
real_time_factor
=
timeElapsedSeconds
/
audioDuration
;
String
waveFilename
=
"tts-matcha-en.wav"
;
audio
.
save
(
waveFilename
);
System
.
out
.
printf
(
"-- elapsed : %.3f seconds\n"
,
timeElapsedSeconds
);
System
.
out
.
printf
(
"-- audio duration: %.3f seconds\n"
,
timeElapsedSeconds
);
System
.
out
.
printf
(
"-- real-time factor (RTF): %.3f\n"
,
real_time_factor
);
System
.
out
.
printf
(
"-- text: %s\n"
,
text
);
System
.
out
.
printf
(
"-- Saved to %s\n"
,
waveFilename
);
tts
.
release
();
}
}
...
...
java-api-examples/NonStreamingTtsMatchaZh.java
0 → 100644
查看文件 @
a00d3b4
// Copyright 2025 Xiaomi Corporation
// This file shows how to use a matcha Chinese TTS model
// to convert text to speech
import
com.k2fsa.sherpa.onnx.*
;
public
class
NonStreamingTtsMatchaZh
{
public
static
void
main
(
String
[]
args
)
{
// please visit
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// to download model files
String
acousticModel
=
"./matcha-icefall-zh-baker/model-steps-3.onnx"
;
String
vocoder
=
"./hifigan_v2.onnx"
;
String
tokens
=
"./matcha-icefall-zh-baker/tokens.txt"
;
String
lexicon
=
"./matcha-icefall-zh-baker/lexicon.txt"
;
String
dictDir
=
"./matcha-icefall-zh-baker/dict"
;
String
ruleFsts
=
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
;
String
text
=
"某某银行的副行长和一些行政领导表示,他们去过长江"
+
"和长白山; 经济不断增长。"
+
"2024年12月31号,拨打110或者18920240511。"
+
"123456块钱。"
;
OfflineTtsMatchaModelConfig
matchaModelConfig
=
OfflineTtsMatchaModelConfig
.
builder
()
.
setAcousticModel
(
acousticModel
)
.
setVocoder
(
vocoder
)
.
setTokens
(
tokens
)
.
setLexicon
(
lexicon
)
.
setDictDir
(
dictDir
)
.
build
();
OfflineTtsModelConfig
modelConfig
=
OfflineTtsModelConfig
.
builder
()
.
setMatcha
(
matchaModelConfig
)
.
setNumThreads
(
1
)
.
setDebug
(
true
)
.
build
();
OfflineTtsConfig
config
=
OfflineTtsConfig
.
builder
().
setModel
(
modelConfig
).
setRuleFsts
(
ruleFsts
).
build
();
OfflineTts
tts
=
new
OfflineTts
(
config
);
int
sid
=
0
;
float
speed
=
1.0f
;
long
start
=
System
.
currentTimeMillis
();
GeneratedAudio
audio
=
tts
.
generate
(
text
,
sid
,
speed
);
long
stop
=
System
.
currentTimeMillis
();
float
timeElapsedSeconds
=
(
stop
-
start
)
/
1000.0f
;
float
audioDuration
=
audio
.
getSamples
().
length
/
(
float
)
audio
.
getSampleRate
();
float
real_time_factor
=
timeElapsedSeconds
/
audioDuration
;
String
waveFilename
=
"tts-matcha-zh.wav"
;
audio
.
save
(
waveFilename
);
System
.
out
.
printf
(
"-- elapsed : %.3f seconds\n"
,
timeElapsedSeconds
);
System
.
out
.
printf
(
"-- audio duration: %.3f seconds\n"
,
timeElapsedSeconds
);
System
.
out
.
printf
(
"-- real-time factor (RTF): %.3f\n"
,
real_time_factor
);
System
.
out
.
printf
(
"-- text: %s\n"
,
text
);
System
.
out
.
printf
(
"-- Saved to %s\n"
,
waveFilename
);
tts
.
release
();
}
}
...
...
java-api-examples/run-non-streaming-tts-matcha-en.sh
0 → 100755
查看文件 @
a00d3b4
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
mkdir -p ../build
pushd
../build
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
popd
fi
if
[
! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar
]
;
then
pushd
../sherpa-onnx/java-api
make
popd
fi
# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if
[
! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi
if
[
! -f ./hifigan_v2.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi
java
\
-Djava.library.path
=
$PWD
/../build/lib
\
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar
\
NonStreamingTtsMatchaEn.java
...
...
java-api-examples/run-non-streaming-tts-matcha-zh.sh
0 → 100755
查看文件 @
a00d3b4
#!/usr/bin/env bash
set
-ex
if
[[
! -f ../build/lib/libsherpa-onnx-jni.dylib
&&
! -f ../build/lib/libsherpa-onnx-jni.so
]]
;
then
mkdir -p ../build
pushd
../build
cmake
\
-DSHERPA_ONNX_ENABLE_PYTHON
=
OFF
\
-DSHERPA_ONNX_ENABLE_TESTS
=
OFF
\
-DSHERPA_ONNX_ENABLE_CHECK
=
OFF
\
-DBUILD_SHARED_LIBS
=
ON
\
-DSHERPA_ONNX_ENABLE_PORTAUDIO
=
OFF
\
-DSHERPA_ONNX_ENABLE_JNI
=
ON
\
..
make -j4
ls -lh lib
popd
fi
if
[
! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar
]
;
then
pushd
../sherpa-onnx/java-api
make
popd
fi
# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if
[
! -f ./matcha-icefall-zh-baker/model-steps-3.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
fi
if
[
! -f ./hifigan_v2.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi
java
\
-Djava.library.path
=
$PWD
/../build/lib
\
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar
\
NonStreamingTtsMatchaZh.java
...
...
sherpa-onnx/csrc/piper-phonemize-lexicon.cc
查看文件 @
a00d3b4
...
...
@@ -369,6 +369,11 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
template
PiperPhonemizeLexicon
::
PiperPhonemizeLexicon
(
NativeResourceManager
*
mgr
,
const
std
::
string
&
tokens
,
const
std
::
string
&
data_dir
,
const
OfflineTtsVitsModelMetaData
&
vits_meta_data
);
template
PiperPhonemizeLexicon
::
PiperPhonemizeLexicon
(
NativeResourceManager
*
mgr
,
const
std
::
string
&
tokens
,
const
std
::
string
&
data_dir
,
const
OfflineTtsMatchaModelMetaData
&
matcha_meta_data
);
#endif
...
...
sherpa-onnx/java-api/Makefile
查看文件 @
a00d3b4
...
...
@@ -35,6 +35,7 @@ java_files += OfflineRecognizerResult.java
java_files
+=
OfflineStream.java
java_files
+=
OfflineRecognizer.java
java_files
+=
OfflineTtsMatchaModelConfig.java
java_files
+=
OfflineTtsVitsModelConfig.java
java_files
+=
OfflineTtsModelConfig.java
java_files
+=
OfflineTtsConfig.java
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsMatchaModelConfig.java
0 → 100644
查看文件 @
a00d3b4
// Copyright 2025 Xiaomi Corporation
package
com
.
k2fsa
.
sherpa
.
onnx
;
public
class
OfflineTtsMatchaModelConfig
{
private
final
String
acousticModel
;
private
final
String
vocoder
;
private
final
String
lexicon
;
private
final
String
tokens
;
private
final
String
dataDir
;
private
final
String
dictDir
;
private
final
float
noiseScale
;
private
final
float
lengthScale
;
private
OfflineTtsMatchaModelConfig
(
Builder
builder
)
{
this
.
acousticModel
=
builder
.
acousticModel
;
this
.
vocoder
=
builder
.
vocoder
;
this
.
lexicon
=
builder
.
lexicon
;
this
.
tokens
=
builder
.
tokens
;
this
.
dataDir
=
builder
.
dataDir
;
this
.
dictDir
=
builder
.
dictDir
;
this
.
noiseScale
=
builder
.
noiseScale
;
this
.
lengthScale
=
builder
.
lengthScale
;
}
public
static
Builder
builder
()
{
return
new
Builder
();
}
public
String
getAcousticModel
()
{
return
acousticModel
;
}
public
String
getVocoder
()
{
return
vocoder
;
}
public
String
getLexicon
()
{
return
lexicon
;
}
public
String
getTokens
()
{
return
tokens
;
}
public
String
getDataDir
()
{
return
dataDir
;
}
public
String
getDictDir
()
{
return
dictDir
;
}
public
float
getLengthScale
()
{
return
lengthScale
;
}
public
float
getNoiseScale
()
{
return
noiseScale
;
}
public
static
class
Builder
{
private
String
acousticModel
=
""
;
private
String
vocoder
=
""
;
private
String
lexicon
=
""
;
private
String
tokens
=
""
;
private
String
dataDir
=
""
;
private
String
dictDir
=
""
;
private
float
noiseScale
=
1.0f
;
private
float
lengthScale
=
1.0f
;
public
OfflineTtsMatchaModelConfig
build
()
{
return
new
OfflineTtsMatchaModelConfig
(
this
);
}
public
Builder
setAcousticModel
(
String
acousticModel
)
{
this
.
acousticModel
=
acousticModel
;
return
this
;
}
public
Builder
setVocoder
(
String
vocoder
)
{
this
.
vocoder
=
vocoder
;
return
this
;
}
public
Builder
setTokens
(
String
tokens
)
{
this
.
tokens
=
tokens
;
return
this
;
}
public
Builder
setLexicon
(
String
lexicon
)
{
this
.
lexicon
=
lexicon
;
return
this
;
}
public
Builder
setDataDir
(
String
dataDir
)
{
this
.
dataDir
=
dataDir
;
return
this
;
}
public
Builder
setDictDir
(
String
dictDir
)
{
this
.
dictDir
=
dictDir
;
return
this
;
}
public
Builder
setNoiseScale
(
float
noiseScale
)
{
this
.
noiseScale
=
noiseScale
;
return
this
;
}
public
Builder
setLengthScale
(
float
lengthScale
)
{
this
.
lengthScale
=
lengthScale
;
return
this
;
}
}
}
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsModelConfig.java
查看文件 @
a00d3b4
...
...
@@ -4,12 +4,14 @@ package com.k2fsa.sherpa.onnx;
public
class
OfflineTtsModelConfig
{
private
final
OfflineTtsVitsModelConfig
vits
;
private
final
OfflineTtsMatchaModelConfig
matcha
;
private
final
int
numThreads
;
private
final
boolean
debug
;
private
final
String
provider
;
private
OfflineTtsModelConfig
(
Builder
builder
)
{
this
.
vits
=
builder
.
vits
;
this
.
matcha
=
builder
.
matcha
;
this
.
numThreads
=
builder
.
numThreads
;
this
.
debug
=
builder
.
debug
;
this
.
provider
=
builder
.
provider
;
...
...
@@ -23,8 +25,13 @@ public class OfflineTtsModelConfig {
return
vits
;
}
public
OfflineTtsMatchaModelConfig
getMatcha
()
{
return
matcha
;
}
public
static
class
Builder
{
private
OfflineTtsVitsModelConfig
vits
=
OfflineTtsVitsModelConfig
.
builder
().
build
();
private
OfflineTtsMatchaModelConfig
matcha
=
OfflineTtsMatchaModelConfig
.
builder
().
build
();
private
int
numThreads
=
1
;
private
boolean
debug
=
true
;
private
String
provider
=
"cpu"
;
...
...
@@ -38,6 +45,11 @@ public class OfflineTtsModelConfig {
return
this
;
}
public
Builder
setMatcha
(
OfflineTtsMatchaModelConfig
matcha
)
{
this
.
matcha
=
matcha
;
return
this
;
}
public
Builder
setNumThreads
(
int
numThreads
)
{
this
.
numThreads
=
numThreads
;
return
this
;
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig.java
查看文件 @
a00d3b4
...
...
@@ -60,9 +60,9 @@ public class OfflineTtsVitsModelConfig {
}
public
static
class
Builder
{
private
String
model
;
private
String
model
=
""
;
private
String
lexicon
=
""
;
private
String
tokens
;
private
String
tokens
=
""
;
private
String
dataDir
=
""
;
private
String
dictDir
=
""
;
private
float
noiseScale
=
0.667f
;
...
...
请
注册
或
登录
后发表评论