test_tts.kt
6.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
package com.k2fsa.sherpa.onnx
fun main() {
testVits()
testMatcha()
testKokoroEn()
testKokoroZhEn()
testKittenEn()
}
fun testKokoroZhEn() {
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
var config = OfflineTtsConfig(
model=OfflineTtsModelConfig(
kokoro=OfflineTtsKokoroModelConfig(
model="./kokoro-multi-lang-v1_0/model.onnx",
voices="./kokoro-multi-lang-v1_0/voices.bin",
tokens="./kokoro-multi-lang-v1_0/tokens.txt",
dataDir="./kokoro-multi-lang-v1_0/espeak-ng-data",
dictDir="./kokoro-multi-lang-v1_0/dict",
lexicon="./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt",
),
numThreads=2,
debug=true,
),
)
val tts = OfflineTts(config=config)
val audio = tts.generateWithCallback(text="中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?", callback=::callback)
audio.save(filename="test-kokoro-zh-en.wav")
tts.release()
println("Saved to test-kokoro-zh-en.wav")
}
fun testKokoroEn() {
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
var config = OfflineTtsConfig(
model=OfflineTtsModelConfig(
kokoro=OfflineTtsKokoroModelConfig(
model="./kokoro-en-v0_19/model.onnx",
voices="./kokoro-en-v0_19/voices.bin",
tokens="./kokoro-en-v0_19/tokens.txt",
dataDir="./kokoro-en-v0_19/espeak-ng-data",
),
numThreads=2,
debug=true,
),
)
val tts = OfflineTts(config=config)
val audio = tts.generateWithCallback(text="How are you doing today?", callback=::callback)
audio.save(filename="test-kokoro-en.wav")
tts.release()
println("Saved to test-kokoro-en.wav")
}
fun testMatcha() {
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
var config = OfflineTtsConfig(
model=OfflineTtsModelConfig(
matcha=OfflineTtsMatchaModelConfig(
acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx",
vocoder="./vocos-22khz-univ.onnx",
tokens="./matcha-icefall-zh-baker/tokens.txt",
lexicon="./matcha-icefall-zh-baker/lexicon.txt",
dictDir="./matcha-icefall-zh-baker/dict",
),
numThreads=1,
debug=true,
),
ruleFsts="./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst",
)
val tts = OfflineTts(config=config)
val audio = tts.generateWithCallback(text="某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。", callback=::callback)
audio.save(filename="test-matcha-zh.wav")
tts.release()
println("Saved to test-matcha-zh.wav")
}
fun testVits() {
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
var config = OfflineTtsConfig(
model=OfflineTtsModelConfig(
vits=OfflineTtsVitsModelConfig(
model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx",
tokens="./vits-piper-en_US-amy-low/tokens.txt",
dataDir="./vits-piper-en_US-amy-low/espeak-ng-data",
),
numThreads=1,
debug=true,
)
)
val tts = OfflineTts(config=config)
val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback)
audio.save(filename="test-en.wav")
tts.release()
println("Saved to test-en.wav")
}
fun testKittenEn() {
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
var config = OfflineTtsConfig(
model=OfflineTtsModelConfig(
kitten=OfflineTtsKittenModelConfig(
model="./kitten-nano-en-v0_1-fp16/model.fp16.onnx",
voices="./kitten-nano-en-v0_1-fp16/voices.bin",
tokens="./kitten-nano-en-v0_1-fp16/tokens.txt",
dataDir="./kitten-nano-en-v0_1-fp16/espeak-ng-data",
),
numThreads=2,
debug=true,
),
)
val tts = OfflineTts(config=config)
val audio = tts.generateWithCallback(text="How are you doing today?", sid=7, callback=::callback)
audio.save(filename="test-kitten-en.wav")
tts.release()
println("Saved to test-kitten-en.wav")
}
/*
1. Unzip test_tts.jar
2.
javap ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class
3. It prints:
Compiled from "test_tts.kt"
final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
public final java.lang.Integer invoke(float[]);
public java.lang.Object invoke(java.lang.Object);
static {};
}
4.
javap -s ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class
5. It prints
Compiled from "test_tts.kt"
final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
descriptor: Lcom/k2fsa/sherpa/onnx/Test_ttsKt$testTts$audio$1;
com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
descriptor: ()V
public final java.lang.Integer invoke(float[]);
descriptor: ([F)Ljava/lang/Integer;
public java.lang.Object invoke(java.lang.Object);
descriptor: (Ljava/lang/Object;)Ljava/lang/Object;
static {};
descriptor: ()V
}
*/
fun callback(samples: FloatArray): Int {
println("callback got called with ${samples.size} samples");
// 1 means to continue
// 0 means to stop
return 1
}