Fangjun Kuang
Committed by GitHub

Refactor TTS Android code to support jieba for Chinese TTS models (#800)

正在显示 40 个修改的文件 包含 352 行增加285 行删除
@@ -93,3 +93,5 @@ sr-data @@ -93,3 +93,5 @@ sr-data
93 vits-icefall-* 93 vits-icefall-*
94 sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 94 sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
95 spoken-language-identification-test-wavs 95 spoken-language-identification-test-wavs
  96 +my-release-key*
  97 +vits-zh-hf-fanchen-C
@@ -158,6 +158,7 @@ class MainActivity : AppCompatActivity() { @@ -158,6 +158,7 @@ class MainActivity : AppCompatActivity() {
158 var ruleFars: String? 158 var ruleFars: String?
159 var lexicon: String? 159 var lexicon: String?
160 var dataDir: String? 160 var dataDir: String?
  161 + var dictDir: String?
161 var assets: AssetManager? = application.assets 162 var assets: AssetManager? = application.assets
162 163
163 // The purpose of such a design is to make the CI test easier 164 // The purpose of such a design is to make the CI test easier
@@ -169,6 +170,7 @@ class MainActivity : AppCompatActivity() { @@ -169,6 +170,7 @@ class MainActivity : AppCompatActivity() {
169 ruleFars = null 170 ruleFars = null
170 lexicon = null 171 lexicon = null
171 dataDir = null 172 dataDir = null
  173 + dictDir = null
172 174
173 // Example 1: 175 // Example 1:
174 // modelDir = "vits-vctk" 176 // modelDir = "vits-vctk"
@@ -191,21 +193,36 @@ class MainActivity : AppCompatActivity() { @@ -191,21 +193,36 @@ class MainActivity : AppCompatActivity() {
191 // lexicon = "lexicon.txt" 193 // lexicon = "lexicon.txt"
192 194
193 // Example 4: 195 // Example 4:
  196 + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
  197 + // modelDir = "vits-zh-hf-fanchen-C"
  198 + // modelName = "vits-zh-hf-fanchen-C.onnx"
  199 + // lexicon = "lexicon.txt"
  200 + // dictDir = "vits-zh-hf-fanchen-C/dict"
  201 +
  202 + // Example 5:
194 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 203 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
195 // modelDir = "vits-coqui-de-css10" 204 // modelDir = "vits-coqui-de-css10"
196 // modelName = "model.onnx" 205 // modelName = "model.onnx"
197 - // lang = "deu"  
198 206
199 if (dataDir != null) { 207 if (dataDir != null) {
200 - val newDir = copyDataDir(modelDir) 208 + val newDir = copyDataDir(modelDir!!)
201 modelDir = newDir + "/" + modelDir 209 modelDir = newDir + "/" + modelDir
202 dataDir = newDir + "/" + dataDir 210 dataDir = newDir + "/" + dataDir
203 assets = null 211 assets = null
204 } 212 }
205 213
  214 + if (dictDir != null) {
  215 + val newDir = copyDataDir( modelDir!!)
  216 + modelDir = newDir + "/" + modelDir
  217 + dictDir = modelDir + "/" + "dict"
  218 + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
  219 + assets = null
  220 + }
  221 +
206 val config = getOfflineTtsConfig( 222 val config = getOfflineTtsConfig(
207 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", 223 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
208 dataDir = dataDir ?: "", 224 dataDir = dataDir ?: "",
  225 + dictDir = dictDir ?: "",
209 ruleFsts = ruleFsts ?: "", 226 ruleFsts = ruleFsts ?: "",
210 ruleFars = ruleFars ?: "", 227 ruleFars = ruleFars ?: "",
211 )!! 228 )!!
@@ -8,6 +8,7 @@ data class OfflineTtsVitsModelConfig( @@ -8,6 +8,7 @@ data class OfflineTtsVitsModelConfig(
8 var lexicon: String = "", 8 var lexicon: String = "",
9 var tokens: String, 9 var tokens: String,
10 var dataDir: String = "", 10 var dataDir: String = "",
  11 + var dictDir: String = "",
11 var noiseScale: Float = 0.667f, 12 var noiseScale: Float = 0.667f,
12 var noiseScaleW: Float = 0.8f, 13 var noiseScaleW: Float = 0.8f,
13 var lengthScale: Float = 1.0f, 14 var lengthScale: Float = 1.0f,
@@ -49,7 +50,7 @@ class OfflineTts( @@ -49,7 +50,7 @@ class OfflineTts(
49 50
50 init { 51 init {
51 if (assetManager != null) { 52 if (assetManager != null) {
52 - ptr = new(assetManager, config) 53 + ptr = newFromAsset(assetManager, config)
53 } else { 54 } else {
54 ptr = newFromFile(config) 55 ptr = newFromFile(config)
55 } 56 }
@@ -87,7 +88,7 @@ class OfflineTts( @@ -87,7 +88,7 @@ class OfflineTts(
87 fun allocate(assetManager: AssetManager? = null) { 88 fun allocate(assetManager: AssetManager? = null) {
88 if (ptr == 0L) { 89 if (ptr == 0L) {
89 if (assetManager != null) { 90 if (assetManager != null) {
90 - ptr = new(assetManager, config) 91 + ptr = newFromAsset(assetManager, config)
91 } else { 92 } else {
92 ptr = newFromFile(config) 93 ptr = newFromFile(config)
93 } 94 }
@@ -105,7 +106,7 @@ class OfflineTts( @@ -105,7 +106,7 @@ class OfflineTts(
105 delete(ptr) 106 delete(ptr)
106 } 107 }
107 108
108 - private external fun new( 109 + private external fun newFromAsset(
109 assetManager: AssetManager, 110 assetManager: AssetManager,
110 config: OfflineTtsConfig, 111 config: OfflineTtsConfig,
111 ): Long 112 ): Long
@@ -152,6 +153,7 @@ fun getOfflineTtsConfig( @@ -152,6 +153,7 @@ fun getOfflineTtsConfig(
152 modelName: String, 153 modelName: String,
153 lexicon: String, 154 lexicon: String,
154 dataDir: String, 155 dataDir: String,
  156 + dictDir: String,
155 ruleFsts: String, 157 ruleFsts: String,
156 ruleFars: String 158 ruleFars: String
157 ): OfflineTtsConfig? { 159 ): OfflineTtsConfig? {
@@ -161,7 +163,8 @@ fun getOfflineTtsConfig( @@ -161,7 +163,8 @@ fun getOfflineTtsConfig(
161 model = "$modelDir/$modelName", 163 model = "$modelDir/$modelName",
162 lexicon = "$modelDir/$lexicon", 164 lexicon = "$modelDir/$lexicon",
163 tokens = "$modelDir/tokens.txt", 165 tokens = "$modelDir/tokens.txt",
164 - dataDir = "$dataDir" 166 + dataDir = dataDir,
  167 + dictDir = dictDir,
165 ), 168 ),
166 numThreads = 2, 169 numThreads = 2,
167 debug = true, 170 debug = true,
@@ -42,6 +42,7 @@ object TtsEngine { @@ -42,6 +42,7 @@ object TtsEngine {
42 private var ruleFars: String? = null 42 private var ruleFars: String? = null
43 private var lexicon: String? = null 43 private var lexicon: String? = null
44 private var dataDir: String? = null 44 private var dataDir: String? = null
  45 + private var dictDir: String? = null
45 private var assets: AssetManager? = null 46 private var assets: AssetManager? = null
46 47
47 init { 48 init {
@@ -54,6 +55,7 @@ object TtsEngine { @@ -54,6 +55,7 @@ object TtsEngine {
54 ruleFars = null 55 ruleFars = null
55 lexicon = null 56 lexicon = null
56 dataDir = null 57 dataDir = null
  58 + dictDir = null
57 lang = null 59 lang = null
58 60
59 // Please enable one and only one of the examples below 61 // Please enable one and only one of the examples below
@@ -83,6 +85,14 @@ object TtsEngine { @@ -83,6 +85,14 @@ object TtsEngine {
83 // lang = "zho" 85 // lang = "zho"
84 86
85 // Example 4: 87 // Example 4:
  88 + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
  89 + // modelDir = "vits-zh-hf-fanchen-C"
  90 + // modelName = "vits-zh-hf-fanchen-C.onnx"
  91 + // lexicon = "lexicon.txt"
  92 + // dictDir = "vits-zh-hf-fanchen-C/dict"
  93 + // lang = "zho"
  94 +
  95 + // Example 5:
86 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 96 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
87 // This model does not need lexicon or dataDir 97 // This model does not need lexicon or dataDir
88 // modelDir = "vits-coqui-de-css10" 98 // modelDir = "vits-coqui-de-css10"
@@ -108,9 +118,18 @@ object TtsEngine { @@ -108,9 +118,18 @@ object TtsEngine {
108 assets = null 118 assets = null
109 } 119 }
110 120
  121 + if (dictDir != null) {
  122 + val newDir = copyDataDir(context, modelDir!!)
  123 + modelDir = newDir + "/" + modelDir
  124 + dictDir = modelDir + "/" + "dict"
  125 + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
  126 + assets = null
  127 + }
  128 +
111 val config = getOfflineTtsConfig( 129 val config = getOfflineTtsConfig(
112 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", 130 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
113 dataDir = dataDir ?: "", 131 dataDir = dataDir ?: "",
  132 + dictDir = dictDir ?: "",
114 ruleFsts = ruleFsts ?: "", 133 ruleFsts = ruleFsts ?: "",
115 ruleFars = ruleFars ?: "" 134 ruleFars = ruleFars ?: ""
116 )!! 135 )!!
@@ -47,7 +47,7 @@ onnxruntime_version=1.17.1 @@ -47,7 +47,7 @@ onnxruntime_version=1.17.1
47 if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then 47 if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
48 mkdir -p $onnxruntime_version 48 mkdir -p $onnxruntime_version
49 pushd $onnxruntime_version 49 pushd $onnxruntime_version
50 - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip 50 + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
51 unzip onnxruntime-android-${onnxruntime_version}.zip 51 unzip onnxruntime-android-${onnxruntime_version}.zip
52 rm onnxruntime-android-${onnxruntime_version}.zip 52 rm onnxruntime-android-${onnxruntime_version}.zip
53 popd 53 popd
@@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1
48 if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then 48 if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
49 mkdir -p $onnxruntime_version 49 mkdir -p $onnxruntime_version
50 pushd $onnxruntime_version 50 pushd $onnxruntime_version
51 - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip 51 + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
52 unzip onnxruntime-android-${onnxruntime_version}.zip 52 unzip onnxruntime-android-${onnxruntime_version}.zip
53 rm onnxruntime-android-${onnxruntime_version}.zip 53 rm onnxruntime-android-${onnxruntime_version}.zip
54 popd 54 popd
@@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1
48 if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then 48 if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
49 mkdir -p $onnxruntime_version 49 mkdir -p $onnxruntime_version
50 pushd $onnxruntime_version 50 pushd $onnxruntime_version
51 - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip 51 + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
52 unzip onnxruntime-android-${onnxruntime_version}.zip 52 unzip onnxruntime-android-${onnxruntime_version}.zip
53 rm onnxruntime-android-${onnxruntime_version}.zip 53 rm onnxruntime-android-${onnxruntime_version}.zip
54 popd 54 popd
@@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1
48 if [ ! -f $onnxruntime_version/jni/x86/libonnxruntime.so ]; then 48 if [ ! -f $onnxruntime_version/jni/x86/libonnxruntime.so ]; then
49 mkdir -p $onnxruntime_version 49 mkdir -p $onnxruntime_version
50 pushd $onnxruntime_version 50 pushd $onnxruntime_version
51 - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip 51 + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip
52 unzip onnxruntime-android-${onnxruntime_version}.zip 52 unzip onnxruntime-android-${onnxruntime_version}.zip
53 rm onnxruntime-android-${onnxruntime_version}.zip 53 rm onnxruntime-android-${onnxruntime_version}.zip
54 popd 54 popd
@@ -61,6 +61,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt @@ -61,6 +61,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
61 sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt 61 sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt
62 {% endif %} 62 {% endif %}
63 63
  64 +{% if tts_model.dict_dir %}
  65 + dict_dir={{ tts_model.dict_dir }}
  66 + sed -i.bak s%"dictDir = null"%"dictDir = \"$dict_dir\""% ./TtsEngine.kt
  67 +{% endif %}
  68 +
64 {% if tts_model.data_dir %} 69 {% if tts_model.data_dir %}
65 data_dir={{ tts_model.data_dir }} 70 data_dir={{ tts_model.data_dir }}
66 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt 71 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt
@@ -59,6 +59,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt @@ -59,6 +59,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
59 sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt 59 sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt
60 {% endif %} 60 {% endif %}
61 61
  62 +{% if tts_model.dict_dir %}
  63 + dict_dir={{ tts_model.dict_dir }}
  64 + sed -i.bak s%"dictDir = null"%"dictDir = \"$dict_dir\""% ./MainActivity.kt
  65 +{% endif %}
  66 +
62 {% if tts_model.data_dir %} 67 {% if tts_model.data_dir %}
63 data_dir={{ tts_model.data_dir }} 68 data_dir={{ tts_model.data_dir }}
64 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt 69 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt
@@ -35,6 +35,7 @@ class TtsModel: @@ -35,6 +35,7 @@ class TtsModel:
35 rule_fsts: Optional[List[str]] = None 35 rule_fsts: Optional[List[str]] = None
36 rule_fars: Optional[List[str]] = None 36 rule_fars: Optional[List[str]] = None
37 data_dir: Optional[str] = None 37 data_dir: Optional[str] = None
  38 + dict_dir: Optional[str] = None
38 is_char: bool = False 39 is_char: bool = False
39 lang_iso_639_3: str = "" 40 lang_iso_639_3: str = ""
40 41
@@ -326,8 +327,14 @@ def get_vits_models() -> List[TtsModel]: @@ -326,8 +327,14 @@ def get_vits_models() -> List[TtsModel]:
326 rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"] 327 rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
327 for m in chinese_models: 328 for m in chinese_models:
328 s = [f"{m.model_dir}/{r}" for r in rule_fsts] 329 s = [f"{m.model_dir}/{r}" for r in rule_fsts]
  330 + if "vits-zh-hf" in m.model_dir:
  331 + s = s[:-1]
  332 + m.dict_dir = m.model_dir + "/dict"
  333 +
329 m.rule_fsts = ",".join(s) 334 m.rule_fsts = ",".join(s)
330 - m.rule_fars = f"{m.model_dir}/rule.far" 335 +
  336 + if "vits-zh-hf" not in m.model_dir:
  337 + m.rule_fars = f"{m.model_dir}/rule.far"
331 338
332 all_models = chinese_models + [ 339 all_models = chinese_models + [
333 TtsModel( 340 TtsModel(
@@ -32,7 +32,7 @@ bool AudioTaggingModelConfig::Validate() const { @@ -32,7 +32,7 @@ bool AudioTaggingModelConfig::Validate() const {
32 } 32 }
33 33
34 if (!ced.empty() && !FileExists(ced)) { 34 if (!ced.empty() && !FileExists(ced)) {
35 - SHERPA_ONNX_LOGE("CED model file %s does not exist", ced.c_str()); 35 + SHERPA_ONNX_LOGE("CED model file '%s' does not exist", ced.c_str());
36 return false; 36 return false;
37 } 37 }
38 38
@@ -48,7 +48,7 @@ bool AudioTaggingConfig::Validate() const { @@ -48,7 +48,7 @@ bool AudioTaggingConfig::Validate() const {
48 } 48 }
49 49
50 if (!FileExists(labels)) { 50 if (!FileExists(labels)) {
51 - SHERPA_ONNX_LOGE("--labels %s does not exist", labels.c_str()); 51 + SHERPA_ONNX_LOGE("--labels '%s' does not exist", labels.c_str());
52 return false; 52 return false;
53 } 53 }
54 54
@@ -7,7 +7,7 @@ @@ -7,7 +7,7 @@
7 #include <fstream> 7 #include <fstream>
8 #include <string> 8 #include <string>
9 9
10 -#include "sherpa-onnx/csrc/log.h" 10 +#include "sherpa-onnx/csrc/macros.h"
11 11
12 namespace sherpa_onnx { 12 namespace sherpa_onnx {
13 13
@@ -17,7 +17,7 @@ bool FileExists(const std::string &filename) { @@ -17,7 +17,7 @@ bool FileExists(const std::string &filename) {
17 17
18 void AssertFileExists(const std::string &filename) { 18 void AssertFileExists(const std::string &filename) {
19 if (!FileExists(filename)) { 19 if (!FileExists(filename)) {
20 - SHERPA_ONNX_LOG(FATAL) << filename << " does not exist!"; 20 + SHERPA_ONNX_LOGE("filename '%s' does not exist", filename.c_str());
21 exit(-1); 21 exit(-1);
22 } 22 }
23 } 23 }
@@ -146,6 +146,14 @@ class JiebaLexicon::Impl { @@ -146,6 +146,14 @@ class JiebaLexicon::Impl {
146 if (token2id_.count(p.first) && !token2id_.count(p.second)) { 146 if (token2id_.count(p.first) && !token2id_.count(p.second)) {
147 token2id_[p.second] = token2id_[p.first]; 147 token2id_[p.second] = token2id_[p.first];
148 } 148 }
  149 +
  150 + if (!token2id_.count(p.first) && token2id_.count(p.second)) {
  151 + token2id_[p.first] = token2id_[p.second];
  152 + }
  153 + }
  154 +
  155 + if (!token2id_.count("、") && token2id_.count(",")) {
  156 + token2id_["、"] = token2id_[","];
149 } 157 }
150 } 158 }
151 159
@@ -101,7 +101,8 @@ bool KeywordSpotterConfig::Validate() const { @@ -101,7 +101,8 @@ bool KeywordSpotterConfig::Validate() const {
101 // Solution: take keyword_file variable is directly 101 // Solution: take keyword_file variable is directly
102 // parsed as a string of keywords 102 // parsed as a string of keywords
103 if (!std::ifstream(keywords_file.c_str()).good()) { 103 if (!std::ifstream(keywords_file.c_str()).good()) {
104 - SHERPA_ONNX_LOGE("Keywords file %s does not exist.", keywords_file.c_str()); 104 + SHERPA_ONNX_LOGE("Keywords file '%s' does not exist.",
  105 + keywords_file.c_str());
105 return false; 106 return false;
106 } 107 }
107 #endif 108 #endif
@@ -34,7 +34,7 @@ void OfflineCtcFstDecoderConfig::Register(ParseOptions *po) { @@ -34,7 +34,7 @@ void OfflineCtcFstDecoderConfig::Register(ParseOptions *po) {
34 34
35 bool OfflineCtcFstDecoderConfig::Validate() const { 35 bool OfflineCtcFstDecoderConfig::Validate() const {
36 if (!graph.empty() && !FileExists(graph)) { 36 if (!graph.empty() && !FileExists(graph)) {
37 - SHERPA_ONNX_LOGE("graph: %s does not exist", graph.c_str()); 37 + SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str());
38 return false; 38 return false;
39 } 39 }
40 return true; 40 return true;
@@ -22,7 +22,7 @@ void OfflineLMConfig::Register(ParseOptions *po) { @@ -22,7 +22,7 @@ void OfflineLMConfig::Register(ParseOptions *po) {
22 22
23 bool OfflineLMConfig::Validate() const { 23 bool OfflineLMConfig::Validate() const {
24 if (!FileExists(model)) { 24 if (!FileExists(model)) {
25 - SHERPA_ONNX_LOGE("%s does not exist", model.c_str()); 25 + SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str());
26 return false; 26 return false;
27 } 27 }
28 28
@@ -16,7 +16,7 @@ void OfflineNemoEncDecCtcModelConfig::Register(ParseOptions *po) { @@ -16,7 +16,7 @@ void OfflineNemoEncDecCtcModelConfig::Register(ParseOptions *po) {
16 16
17 bool OfflineNemoEncDecCtcModelConfig::Validate() const { 17 bool OfflineNemoEncDecCtcModelConfig::Validate() const {
18 if (!FileExists(model)) { 18 if (!FileExists(model)) {
19 - SHERPA_ONNX_LOGE("NeMo model: %s does not exist", model.c_str()); 19 + SHERPA_ONNX_LOGE("NeMo model: '%s' does not exist", model.c_str());
20 return false; 20 return false;
21 } 21 }
22 22
@@ -15,7 +15,7 @@ void OfflineParaformerModelConfig::Register(ParseOptions *po) { @@ -15,7 +15,7 @@ void OfflineParaformerModelConfig::Register(ParseOptions *po) {
15 15
16 bool OfflineParaformerModelConfig::Validate() const { 16 bool OfflineParaformerModelConfig::Validate() const {
17 if (!FileExists(model)) { 17 if (!FileExists(model)) {
18 - SHERPA_ONNX_LOGE("Paraformer model %s does not exist", model.c_str()); 18 + SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", model.c_str());
19 return false; 19 return false;
20 } 20 }
21 21
@@ -18,19 +18,19 @@ void OfflineTransducerModelConfig::Register(ParseOptions *po) { @@ -18,19 +18,19 @@ void OfflineTransducerModelConfig::Register(ParseOptions *po) {
18 18
19 bool OfflineTransducerModelConfig::Validate() const { 19 bool OfflineTransducerModelConfig::Validate() const {
20 if (!FileExists(encoder_filename)) { 20 if (!FileExists(encoder_filename)) {
21 - SHERPA_ONNX_LOGE("transducer encoder: %s does not exist", 21 + SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist",
22 encoder_filename.c_str()); 22 encoder_filename.c_str());
23 return false; 23 return false;
24 } 24 }
25 25
26 if (!FileExists(decoder_filename)) { 26 if (!FileExists(decoder_filename)) {
27 - SHERPA_ONNX_LOGE("transducer decoder: %s does not exist", 27 + SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist",
28 decoder_filename.c_str()); 28 decoder_filename.c_str());
29 return false; 29 return false;
30 } 30 }
31 31
32 if (!FileExists(joiner_filename)) { 32 if (!FileExists(joiner_filename)) {
33 - SHERPA_ONNX_LOGE("transducer joiner: %s does not exist", 33 + SHERPA_ONNX_LOGE("transducer joiner: '%s' does not exist",
34 joiner_filename.c_str()); 34 joiner_filename.c_str());
35 return false; 35 return false;
36 } 36 }
@@ -35,7 +35,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { @@ -35,7 +35,7 @@ bool OfflineTtsVitsModelConfig::Validate() const {
35 } 35 }
36 36
37 if (!FileExists(model)) { 37 if (!FileExists(model)) {
38 - SHERPA_ONNX_LOGE("--vits-model: %s does not exist", model.c_str()); 38 + SHERPA_ONNX_LOGE("--vits-model: '%s' does not exist", model.c_str());
39 return false; 39 return false;
40 } 40 }
41 41
@@ -45,31 +45,31 @@ bool OfflineTtsVitsModelConfig::Validate() const { @@ -45,31 +45,31 @@ bool OfflineTtsVitsModelConfig::Validate() const {
45 } 45 }
46 46
47 if (!FileExists(tokens)) { 47 if (!FileExists(tokens)) {
48 - SHERPA_ONNX_LOGE("--vits-tokens: %s does not exist", tokens.c_str()); 48 + SHERPA_ONNX_LOGE("--vits-tokens: '%s' does not exist", tokens.c_str());
49 return false; 49 return false;
50 } 50 }
51 51
52 if (!data_dir.empty()) { 52 if (!data_dir.empty()) {
53 if (!FileExists(data_dir + "/phontab")) { 53 if (!FileExists(data_dir + "/phontab")) {
54 - SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test", 54 + SHERPA_ONNX_LOGE("'%s/phontab' does not exist. Skipping test",
55 data_dir.c_str()); 55 data_dir.c_str());
56 return false; 56 return false;
57 } 57 }
58 58
59 if (!FileExists(data_dir + "/phonindex")) { 59 if (!FileExists(data_dir + "/phonindex")) {
60 - SHERPA_ONNX_LOGE("%s/phonindex does not exist. Skipping test", 60 + SHERPA_ONNX_LOGE("'%s/phonindex' does not exist. Skipping test",
61 data_dir.c_str()); 61 data_dir.c_str());
62 return false; 62 return false;
63 } 63 }
64 64
65 if (!FileExists(data_dir + "/phondata")) { 65 if (!FileExists(data_dir + "/phondata")) {
66 - SHERPA_ONNX_LOGE("%s/phondata does not exist. Skipping test", 66 + SHERPA_ONNX_LOGE("'%s/phondata' does not exist. Skipping test",
67 data_dir.c_str()); 67 data_dir.c_str());
68 return false; 68 return false;
69 } 69 }
70 70
71 if (!FileExists(data_dir + "/intonations")) { 71 if (!FileExists(data_dir + "/intonations")) {
72 - SHERPA_ONNX_LOGE("%s/intonations does not exist.", data_dir.c_str()); 72 + SHERPA_ONNX_LOGE("'%s/intonations' does not exist.", data_dir.c_str());
73 return false; 73 return false;
74 } 74 }
75 } 75 }
@@ -82,7 +82,8 @@ bool OfflineTtsVitsModelConfig::Validate() const { @@ -82,7 +82,8 @@ bool OfflineTtsVitsModelConfig::Validate() const {
82 82
83 for (const auto &f : required_files) { 83 for (const auto &f : required_files) {
84 if (!FileExists(dict_dir + "/" + f)) { 84 if (!FileExists(dict_dir + "/" + f)) {
85 - SHERPA_ONNX_LOGE("%s/%s does not exist.", data_dir.c_str(), f.c_str()); 85 + SHERPA_ONNX_LOGE("'%s/%s' does not exist.", data_dir.c_str(),
  86 + f.c_str());
86 return false; 87 return false;
87 } 88 }
88 } 89 }
@@ -42,7 +42,7 @@ bool OfflineTtsConfig::Validate() const { @@ -42,7 +42,7 @@ bool OfflineTtsConfig::Validate() const {
42 SplitStringToVector(rule_fsts, ",", false, &files); 42 SplitStringToVector(rule_fsts, ",", false, &files);
43 for (const auto &f : files) { 43 for (const auto &f : files) {
44 if (!FileExists(f)) { 44 if (!FileExists(f)) {
45 - SHERPA_ONNX_LOGE("Rule fst %s does not exist. ", f.c_str()); 45 + SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str());
46 return false; 46 return false;
47 } 47 }
48 } 48 }
@@ -53,7 +53,7 @@ bool OfflineTtsConfig::Validate() const { @@ -53,7 +53,7 @@ bool OfflineTtsConfig::Validate() const {
53 SplitStringToVector(rule_fars, ",", false, &files); 53 SplitStringToVector(rule_fars, ",", false, &files);
54 for (const auto &f : files) { 54 for (const auto &f : files) {
55 if (!FileExists(f)) { 55 if (!FileExists(f)) {
56 - SHERPA_ONNX_LOGE("Rule far %s does not exist. ", f.c_str()); 56 + SHERPA_ONNX_LOGE("Rule far '%s' does not exist. ", f.c_str());
57 return false; 57 return false;
58 } 58 }
59 } 59 }
@@ -18,7 +18,7 @@ void OfflineWenetCtcModelConfig::Register(ParseOptions *po) { @@ -18,7 +18,7 @@ void OfflineWenetCtcModelConfig::Register(ParseOptions *po) {
18 18
19 bool OfflineWenetCtcModelConfig::Validate() const { 19 bool OfflineWenetCtcModelConfig::Validate() const {
20 if (!FileExists(model)) { 20 if (!FileExists(model)) {
21 - SHERPA_ONNX_LOGE("WeNet model: %s does not exist", model.c_str()); 21 + SHERPA_ONNX_LOGE("WeNet model: '%s' does not exist", model.c_str());
22 return false; 22 return false;
23 } 23 }
24 24
@@ -48,7 +48,8 @@ bool OfflineWhisperModelConfig::Validate() const { @@ -48,7 +48,8 @@ bool OfflineWhisperModelConfig::Validate() const {
48 } 48 }
49 49
50 if (!FileExists(encoder)) { 50 if (!FileExists(encoder)) {
51 - SHERPA_ONNX_LOGE("whisper encoder file %s does not exist", encoder.c_str()); 51 + SHERPA_ONNX_LOGE("whisper encoder file '%s' does not exist",
  52 + encoder.c_str());
52 return false; 53 return false;
53 } 54 }
54 55
@@ -58,7 +59,8 @@ bool OfflineWhisperModelConfig::Validate() const { @@ -58,7 +59,8 @@ bool OfflineWhisperModelConfig::Validate() const {
58 } 59 }
59 60
60 if (!FileExists(decoder)) { 61 if (!FileExists(decoder)) {
61 - SHERPA_ONNX_LOGE("whisper decoder file %s does not exist", decoder.c_str()); 62 + SHERPA_ONNX_LOGE("whisper decoder file '%s' does not exist",
  63 + decoder.c_str());
62 return false; 64 return false;
63 } 65 }
64 66
@@ -21,7 +21,7 @@ bool OfflineZipformerAudioTaggingModelConfig::Validate() const { @@ -21,7 +21,7 @@ bool OfflineZipformerAudioTaggingModelConfig::Validate() const {
21 } 21 }
22 22
23 if (!FileExists(model)) { 23 if (!FileExists(model)) {
24 - SHERPA_ONNX_LOGE("--zipformer-model: %s does not exist", model.c_str()); 24 + SHERPA_ONNX_LOGE("--zipformer-model: '%s' does not exist", model.c_str());
25 return false; 25 return false;
26 } 26 }
27 27
@@ -15,7 +15,7 @@ void OfflineZipformerCtcModelConfig::Register(ParseOptions *po) { @@ -15,7 +15,7 @@ void OfflineZipformerCtcModelConfig::Register(ParseOptions *po) {
15 15
16 bool OfflineZipformerCtcModelConfig::Validate() const { 16 bool OfflineZipformerCtcModelConfig::Validate() const {
17 if (!FileExists(model)) { 17 if (!FileExists(model)) {
18 - SHERPA_ONNX_LOGE("zipformer CTC model file %s does not exist", 18 + SHERPA_ONNX_LOGE("zipformer CTC model file '%s' does not exist",
19 model.c_str()); 19 model.c_str());
20 return false; 20 return false;
21 } 21 }
@@ -31,7 +31,7 @@ void OnlineCtcFstDecoderConfig::Register(ParseOptions *po) { @@ -31,7 +31,7 @@ void OnlineCtcFstDecoderConfig::Register(ParseOptions *po) {
31 31
32 bool OnlineCtcFstDecoderConfig::Validate() const { 32 bool OnlineCtcFstDecoderConfig::Validate() const {
33 if (!graph.empty() && !FileExists(graph)) { 33 if (!graph.empty() && !FileExists(graph)) {
34 - SHERPA_ONNX_LOGE("graph: %s does not exist", graph.c_str()); 34 + SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str());
35 return false; 35 return false;
36 } 36 }
37 return true; 37 return true;
@@ -22,7 +22,7 @@ void OnlineLMConfig::Register(ParseOptions *po) { @@ -22,7 +22,7 @@ void OnlineLMConfig::Register(ParseOptions *po) {
22 22
23 bool OnlineLMConfig::Validate() const { 23 bool OnlineLMConfig::Validate() const {
24 if (!FileExists(model)) { 24 if (!FileExists(model)) {
25 - SHERPA_ONNX_LOGE("%s does not exist", model.c_str()); 25 + SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str());
26 return false; 26 return false;
27 } 27 }
28 28
@@ -45,7 +45,7 @@ bool OnlineModelConfig::Validate() const { @@ -45,7 +45,7 @@ bool OnlineModelConfig::Validate() const {
45 } 45 }
46 46
47 if (!FileExists(tokens)) { 47 if (!FileExists(tokens)) {
48 - SHERPA_ONNX_LOGE("tokens: %s does not exist", tokens.c_str()); 48 + SHERPA_ONNX_LOGE("tokens: '%s' does not exist", tokens.c_str());
49 return false; 49 return false;
50 } 50 }
51 51
@@ -18,12 +18,12 @@ void OnlineParaformerModelConfig::Register(ParseOptions *po) { @@ -18,12 +18,12 @@ void OnlineParaformerModelConfig::Register(ParseOptions *po) {
18 18
19 bool OnlineParaformerModelConfig::Validate() const { 19 bool OnlineParaformerModelConfig::Validate() const {
20 if (!FileExists(encoder)) { 20 if (!FileExists(encoder)) {
21 - SHERPA_ONNX_LOGE("Paraformer encoder %s does not exist", encoder.c_str()); 21 + SHERPA_ONNX_LOGE("Paraformer encoder '%s' does not exist", encoder.c_str());
22 return false; 22 return false;
23 } 23 }
24 24
25 if (!FileExists(decoder)) { 25 if (!FileExists(decoder)) {
26 - SHERPA_ONNX_LOGE("Paraformer decoder %s does not exist", decoder.c_str()); 26 + SHERPA_ONNX_LOGE("Paraformer decoder '%s' does not exist", decoder.c_str());
27 return false; 27 return false;
28 } 28 }
29 29
@@ -18,17 +18,19 @@ void OnlineTransducerModelConfig::Register(ParseOptions *po) { @@ -18,17 +18,19 @@ void OnlineTransducerModelConfig::Register(ParseOptions *po) {
18 18
19 bool OnlineTransducerModelConfig::Validate() const { 19 bool OnlineTransducerModelConfig::Validate() const {
20 if (!FileExists(encoder)) { 20 if (!FileExists(encoder)) {
21 - SHERPA_ONNX_LOGE("transducer encoder: %s does not exist", encoder.c_str()); 21 + SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist",
  22 + encoder.c_str());
22 return false; 23 return false;
23 } 24 }
24 25
25 if (!FileExists(decoder)) { 26 if (!FileExists(decoder)) {
26 - SHERPA_ONNX_LOGE("transducer decoder: %s does not exist", decoder.c_str()); 27 + SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist",
  28 + decoder.c_str());
27 return false; 29 return false;
28 } 30 }
29 31
30 if (!FileExists(joiner)) { 32 if (!FileExists(joiner)) {
31 - SHERPA_ONNX_LOGE("joiner: %s does not exist", joiner.c_str()); 33 + SHERPA_ONNX_LOGE("joiner: '%s' does not exist", joiner.c_str());
32 return false; 34 return false;
33 } 35 }
34 36
@@ -21,7 +21,7 @@ void OnlineWenetCtcModelConfig::Register(ParseOptions *po) { @@ -21,7 +21,7 @@ void OnlineWenetCtcModelConfig::Register(ParseOptions *po) {
21 21
22 bool OnlineWenetCtcModelConfig::Validate() const { 22 bool OnlineWenetCtcModelConfig::Validate() const {
23 if (!FileExists(model)) { 23 if (!FileExists(model)) {
24 - SHERPA_ONNX_LOGE("WeNet CTC model %s does not exist", model.c_str()); 24 + SHERPA_ONNX_LOGE("WeNet CTC model '%s' does not exist", model.c_str());
25 return false; 25 return false;
26 } 26 }
27 27
@@ -22,7 +22,8 @@ bool OnlineZipformer2CtcModelConfig::Validate() const { @@ -22,7 +22,8 @@ bool OnlineZipformer2CtcModelConfig::Validate() const {
22 } 22 }
23 23
24 if (!FileExists(model)) { 24 if (!FileExists(model)) {
25 - SHERPA_ONNX_LOGE("--zipformer2-ctc-model %s does not exist", model.c_str()); 25 + SHERPA_ONNX_LOGE("--zipformer2-ctc-model '%s' does not exist",
  26 + model.c_str());
26 return false; 27 return false;
27 } 28 }
28 29
@@ -44,7 +44,8 @@ bool SileroVadModelConfig::Validate() const { @@ -44,7 +44,8 @@ bool SileroVadModelConfig::Validate() const {
44 } 44 }
45 45
46 if (!FileExists(model)) { 46 if (!FileExists(model)) {
47 - SHERPA_ONNX_LOGE("Silero vad model file %s does not exist", model.c_str()); 47 + SHERPA_ONNX_LOGE("Silero vad model file '%s' does not exist",
  48 + model.c_str());
48 return false; 49 return false;
49 } 50 }
50 51
@@ -31,7 +31,7 @@ bool SpeakerEmbeddingExtractorConfig::Validate() const { @@ -31,7 +31,7 @@ bool SpeakerEmbeddingExtractorConfig::Validate() const {
31 } 31 }
32 32
33 if (!FileExists(model)) { 33 if (!FileExists(model)) {
34 - SHERPA_ONNX_LOGE("--speaker-embedding-model: %s does not exist", 34 + SHERPA_ONNX_LOGE("--speaker-embedding-model: '%s' does not exist",
35 model.c_str()); 35 model.c_str());
36 return false; 36 return false;
37 } 37 }
@@ -43,7 +43,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const { @@ -43,7 +43,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const {
43 } 43 }
44 44
45 if (!FileExists(encoder)) { 45 if (!FileExists(encoder)) {
46 - SHERPA_ONNX_LOGE("whisper encoder file %s does not exist", encoder.c_str()); 46 + SHERPA_ONNX_LOGE("whisper encoder file '%s' does not exist",
  47 + encoder.c_str());
47 return false; 48 return false;
48 } 49 }
49 50
@@ -53,7 +54,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const { @@ -53,7 +54,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const {
53 } 54 }
54 55
55 if (!FileExists(decoder)) { 56 if (!FileExists(decoder)) {
56 - SHERPA_ONNX_LOGE("whisper decoder file %s does not exist", decoder.c_str()); 57 + SHERPA_ONNX_LOGE("whisper decoder file '%s' does not exist",
  58 + decoder.c_str());
57 return false; 59 return false;
58 } 60 }
59 61
@@ -9,11 +9,20 @@ if(NOT DEFINED ANDROID_ABI) @@ -9,11 +9,20 @@ if(NOT DEFINED ANDROID_ABI)
9 include_directories($ENV{JAVA_HOME}/include/darwin) 9 include_directories($ENV{JAVA_HOME}/include/darwin)
10 endif() 10 endif()
11 11
12 -add_library(sherpa-onnx-jni 12 +set(sources
13 audio-tagging.cc 13 audio-tagging.cc
14 jni.cc 14 jni.cc
15 offline-stream.cc 15 offline-stream.cc
16 spoken-language-identification.cc 16 spoken-language-identification.cc
17 ) 17 )
  18 +
  19 +if(SHERPA_ONNX_ENABLE_TTS)
  20 + list(APPEND sources
  21 + offline-tts.cc
  22 + )
  23 +endif()
  24 +
  25 +add_library(sherpa-onnx-jni ${sources})
  26 +
18 target_link_libraries(sherpa-onnx-jni sherpa-onnx-core) 27 target_link_libraries(sherpa-onnx-jni sherpa-onnx-core)
19 install(TARGETS sherpa-onnx-jni DESTINATION lib) 28 install(TARGETS sherpa-onnx-jni DESTINATION lib)
@@ -24,10 +24,6 @@ @@ -24,10 +24,6 @@
24 #include "sherpa-onnx/csrc/wave-writer.h" 24 #include "sherpa-onnx/csrc/wave-writer.h"
25 #include "sherpa-onnx/jni/common.h" 25 #include "sherpa-onnx/jni/common.h"
26 26
27 -#if SHERPA_ONNX_ENABLE_TTS == 1  
28 -#include "sherpa-onnx/csrc/offline-tts.h"  
29 -#endif  
30 -  
31 namespace sherpa_onnx { 27 namespace sherpa_onnx {
32 28
33 class SherpaOnnx { 29 class SherpaOnnx {
@@ -775,113 +771,6 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { @@ -775,113 +771,6 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) {
775 return ans; 771 return ans;
776 } 772 }
777 773
778 -#if SHERPA_ONNX_ENABLE_TTS == 1  
779 -class SherpaOnnxOfflineTts {  
780 - public:  
781 -#if __ANDROID_API__ >= 9  
782 - SherpaOnnxOfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config)  
783 - : tts_(mgr, config) {}  
784 -#endif  
785 - explicit SherpaOnnxOfflineTts(const OfflineTtsConfig &config)  
786 - : tts_(config) {}  
787 -  
788 - GeneratedAudio Generate(const std::string &text, int64_t sid = 0,  
789 - float speed = 1.0,  
790 - std::function<void(const float *, int32_t, float)>  
791 - callback = nullptr) const {  
792 - return tts_.Generate(text, sid, speed, callback);  
793 - }  
794 -  
795 - int32_t SampleRate() const { return tts_.SampleRate(); }  
796 -  
797 - int32_t NumSpeakers() const { return tts_.NumSpeakers(); }  
798 -  
799 - private:  
800 - OfflineTts tts_;  
801 -};  
802 -  
803 -static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {  
804 - OfflineTtsConfig ans;  
805 -  
806 - jclass cls = env->GetObjectClass(config);  
807 - jfieldID fid;  
808 -  
809 - fid = env->GetFieldID(cls, "model",  
810 - "Lcom/k2fsa/sherpa/onnx/OfflineTtsModelConfig;");  
811 - jobject model = env->GetObjectField(config, fid);  
812 - jclass model_config_cls = env->GetObjectClass(model);  
813 -  
814 - fid = env->GetFieldID(model_config_cls, "vits",  
815 - "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;");  
816 - jobject vits = env->GetObjectField(model, fid);  
817 - jclass vits_cls = env->GetObjectClass(vits);  
818 -  
819 - fid = env->GetFieldID(vits_cls, "model", "Ljava/lang/String;");  
820 - jstring s = (jstring)env->GetObjectField(vits, fid);  
821 - const char *p = env->GetStringUTFChars(s, nullptr);  
822 - ans.model.vits.model = p;  
823 - env->ReleaseStringUTFChars(s, p);  
824 -  
825 - fid = env->GetFieldID(vits_cls, "lexicon", "Ljava/lang/String;");  
826 - s = (jstring)env->GetObjectField(vits, fid);  
827 - p = env->GetStringUTFChars(s, nullptr);  
828 - ans.model.vits.lexicon = p;  
829 - env->ReleaseStringUTFChars(s, p);  
830 -  
831 - fid = env->GetFieldID(vits_cls, "tokens", "Ljava/lang/String;");  
832 - s = (jstring)env->GetObjectField(vits, fid);  
833 - p = env->GetStringUTFChars(s, nullptr);  
834 - ans.model.vits.tokens = p;  
835 - env->ReleaseStringUTFChars(s, p);  
836 -  
837 - fid = env->GetFieldID(vits_cls, "dataDir", "Ljava/lang/String;");  
838 - s = (jstring)env->GetObjectField(vits, fid);  
839 - p = env->GetStringUTFChars(s, nullptr);  
840 - ans.model.vits.data_dir = p;  
841 - env->ReleaseStringUTFChars(s, p);  
842 -  
843 - fid = env->GetFieldID(vits_cls, "noiseScale", "F");  
844 - ans.model.vits.noise_scale = env->GetFloatField(vits, fid);  
845 -  
846 - fid = env->GetFieldID(vits_cls, "noiseScaleW", "F");  
847 - ans.model.vits.noise_scale_w = env->GetFloatField(vits, fid);  
848 -  
849 - fid = env->GetFieldID(vits_cls, "lengthScale", "F");  
850 - ans.model.vits.length_scale = env->GetFloatField(vits, fid);  
851 -  
852 - fid = env->GetFieldID(model_config_cls, "numThreads", "I");  
853 - ans.model.num_threads = env->GetIntField(model, fid);  
854 -  
855 - fid = env->GetFieldID(model_config_cls, "debug", "Z");  
856 - ans.model.debug = env->GetBooleanField(model, fid);  
857 -  
858 - fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");  
859 - s = (jstring)env->GetObjectField(model, fid);  
860 - p = env->GetStringUTFChars(s, nullptr);  
861 - ans.model.provider = p;  
862 - env->ReleaseStringUTFChars(s, p);  
863 -  
864 - // for ruleFsts  
865 - fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;");  
866 - s = (jstring)env->GetObjectField(config, fid);  
867 - p = env->GetStringUTFChars(s, nullptr);  
868 - ans.rule_fsts = p;  
869 - env->ReleaseStringUTFChars(s, p);  
870 -  
871 - // for ruleFars  
872 - fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");  
873 - s = (jstring)env->GetObjectField(config, fid);  
874 - p = env->GetStringUTFChars(s, nullptr);  
875 - ans.rule_fars = p;  
876 - env->ReleaseStringUTFChars(s, p);  
877 -  
878 - fid = env->GetFieldID(cls, "maxNumSentences", "I");  
879 - ans.max_num_sentences = env->GetIntField(config, fid);  
880 -  
881 - return ans;  
882 -}  
883 -#endif  
884 -  
885 } // namespace sherpa_onnx 774 } // namespace sherpa_onnx
886 775
887 SHERPA_ONNX_EXTERN_C 776 SHERPA_ONNX_EXTERN_C
@@ -1226,128 +1115,6 @@ jobject NewFloat(JNIEnv *env, float value) { @@ -1226,128 +1115,6 @@ jobject NewFloat(JNIEnv *env, float value) {
1226 return env->NewObject(cls, constructor, value); 1115 return env->NewObject(cls, constructor, value);
1227 } 1116 }
1228 1117
1229 -#if SHERPA_ONNX_ENABLE_TTS == 1  
1230 -SHERPA_ONNX_EXTERN_C  
1231 -JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new(  
1232 - JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {  
1233 -#if __ANDROID_API__ >= 9  
1234 - AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);  
1235 - if (!mgr) {  
1236 - SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);  
1237 - }  
1238 -#endif  
1239 - auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);  
1240 - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());  
1241 -  
1242 - auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(  
1243 -#if __ANDROID_API__ >= 9  
1244 - mgr,  
1245 -#endif  
1246 - config);  
1247 -  
1248 - return (jlong)tts;  
1249 -}  
1250 -  
1251 -SHERPA_ONNX_EXTERN_C  
1252 -JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile(  
1253 - JNIEnv *env, jobject /*obj*/, jobject _config) {  
1254 - auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);  
1255 - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());  
1256 -  
1257 - if (!config.Validate()) {  
1258 - SHERPA_ONNX_LOGE("Errors found in config!");  
1259 - }  
1260 -  
1261 - auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config);  
1262 -  
1263 - return (jlong)tts;  
1264 -}  
1265 -  
1266 -SHERPA_ONNX_EXTERN_C  
1267 -JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_delete(  
1268 - JNIEnv *env, jobject /*obj*/, jlong ptr) {  
1269 - delete reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr);  
1270 -}  
1271 -  
1272 -SHERPA_ONNX_EXTERN_C  
1273 -JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getSampleRate(  
1274 - JNIEnv *env, jobject /*obj*/, jlong ptr) {  
1275 - return reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr)  
1276 - ->SampleRate();  
1277 -}  
1278 -  
1279 -SHERPA_ONNX_EXTERN_C  
1280 -JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers(  
1281 - JNIEnv *env, jobject /*obj*/, jlong ptr) {  
1282 - return reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr)  
1283 - ->NumSpeakers();  
1284 -}  
1285 -  
1286 -SHERPA_ONNX_EXTERN_C  
1287 -JNIEXPORT jobjectArray JNICALL  
1288 -Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/,  
1289 - jlong ptr, jstring text,  
1290 - jint sid, jfloat speed) {  
1291 - const char *p_text = env->GetStringUTFChars(text, nullptr);  
1292 - SHERPA_ONNX_LOGE("string is: %s", p_text);  
1293 -  
1294 - auto audio =  
1295 - reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr)->Generate(  
1296 - p_text, sid, speed);  
1297 -  
1298 - jfloatArray samples_arr = env->NewFloatArray(audio.samples.size());  
1299 - env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(),  
1300 - audio.samples.data());  
1301 -  
1302 - jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(  
1303 - 2, env->FindClass("java/lang/Object"), nullptr);  
1304 -  
1305 - env->SetObjectArrayElement(obj_arr, 0, samples_arr);  
1306 - env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate));  
1307 -  
1308 - env->ReleaseStringUTFChars(text, p_text);  
1309 -  
1310 - return obj_arr;  
1311 -}  
1312 -  
1313 -SHERPA_ONNX_EXTERN_C  
1314 -JNIEXPORT jobjectArray JNICALL  
1315 -Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(  
1316 - JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid,  
1317 - jfloat speed, jobject callback) {  
1318 - const char *p_text = env->GetStringUTFChars(text, nullptr);  
1319 - SHERPA_ONNX_LOGE("string is: %s", p_text);  
1320 -  
1321 - std::function<void(const float *, int32_t, float)> callback_wrapper =  
1322 - [env, callback](const float *samples, int32_t n, float /*p*/) {  
1323 - jclass cls = env->GetObjectClass(callback);  
1324 - jmethodID mid = env->GetMethodID(cls, "invoke", "([F)V");  
1325 -  
1326 - jfloatArray samples_arr = env->NewFloatArray(n);  
1327 - env->SetFloatArrayRegion(samples_arr, 0, n, samples);  
1328 - env->CallVoidMethod(callback, mid, samples_arr);  
1329 - };  
1330 -  
1331 - auto audio =  
1332 - reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr)->Generate(  
1333 - p_text, sid, speed, callback_wrapper);  
1334 -  
1335 - jfloatArray samples_arr = env->NewFloatArray(audio.samples.size());  
1336 - env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(),  
1337 - audio.samples.data());  
1338 -  
1339 - jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(  
1340 - 2, env->FindClass("java/lang/Object"), nullptr);  
1341 -  
1342 - env->SetObjectArrayElement(obj_arr, 0, samples_arr);  
1343 - env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate));  
1344 -  
1345 - env->ReleaseStringUTFChars(text, p_text);  
1346 -  
1347 - return obj_arr;  
1348 -}  
1349 -#endif  
1350 -  
1351 SHERPA_ONNX_EXTERN_C 1118 SHERPA_ONNX_EXTERN_C
1352 JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( 1119 JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl(
1353 JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples, 1120 JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples,
  1 +// sherpa-onnx/jni/offline-tts.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +
  5 +#include "sherpa-onnx/csrc/offline-tts.h"
  6 +
  7 +#include "sherpa-onnx/csrc/macros.h"
  8 +#include "sherpa-onnx/jni/common.h"
  9 +
  10 +namespace sherpa_onnx {
  11 +
  12 +static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
  13 + OfflineTtsConfig ans;
  14 +
  15 + jclass cls = env->GetObjectClass(config);
  16 + jfieldID fid;
  17 +
  18 + fid = env->GetFieldID(cls, "model",
  19 + "Lcom/k2fsa/sherpa/onnx/OfflineTtsModelConfig;");
  20 + jobject model = env->GetObjectField(config, fid);
  21 + jclass model_config_cls = env->GetObjectClass(model);
  22 +
  23 + fid = env->GetFieldID(model_config_cls, "vits",
  24 + "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;");
  25 + jobject vits = env->GetObjectField(model, fid);
  26 + jclass vits_cls = env->GetObjectClass(vits);
  27 +
  28 + fid = env->GetFieldID(vits_cls, "model", "Ljava/lang/String;");
  29 + jstring s = (jstring)env->GetObjectField(vits, fid);
  30 + const char *p = env->GetStringUTFChars(s, nullptr);
  31 + ans.model.vits.model = p;
  32 + env->ReleaseStringUTFChars(s, p);
  33 +
  34 + fid = env->GetFieldID(vits_cls, "lexicon", "Ljava/lang/String;");
  35 + s = (jstring)env->GetObjectField(vits, fid);
  36 + p = env->GetStringUTFChars(s, nullptr);
  37 + ans.model.vits.lexicon = p;
  38 + env->ReleaseStringUTFChars(s, p);
  39 +
  40 + fid = env->GetFieldID(vits_cls, "tokens", "Ljava/lang/String;");
  41 + s = (jstring)env->GetObjectField(vits, fid);
  42 + p = env->GetStringUTFChars(s, nullptr);
  43 + ans.model.vits.tokens = p;
  44 + env->ReleaseStringUTFChars(s, p);
  45 +
  46 + fid = env->GetFieldID(vits_cls, "dataDir", "Ljava/lang/String;");
  47 + s = (jstring)env->GetObjectField(vits, fid);
  48 + p = env->GetStringUTFChars(s, nullptr);
  49 + ans.model.vits.data_dir = p;
  50 + env->ReleaseStringUTFChars(s, p);
  51 +
  52 + fid = env->GetFieldID(vits_cls, "dictDir", "Ljava/lang/String;");
  53 + s = (jstring)env->GetObjectField(vits, fid);
  54 + p = env->GetStringUTFChars(s, nullptr);
  55 + ans.model.vits.dict_dir = p;
  56 + env->ReleaseStringUTFChars(s, p);
  57 +
  58 + fid = env->GetFieldID(vits_cls, "noiseScale", "F");
  59 + ans.model.vits.noise_scale = env->GetFloatField(vits, fid);
  60 +
  61 + fid = env->GetFieldID(vits_cls, "noiseScaleW", "F");
  62 + ans.model.vits.noise_scale_w = env->GetFloatField(vits, fid);
  63 +
  64 + fid = env->GetFieldID(vits_cls, "lengthScale", "F");
  65 + ans.model.vits.length_scale = env->GetFloatField(vits, fid);
  66 +
  67 + fid = env->GetFieldID(model_config_cls, "numThreads", "I");
  68 + ans.model.num_threads = env->GetIntField(model, fid);
  69 +
  70 + fid = env->GetFieldID(model_config_cls, "debug", "Z");
  71 + ans.model.debug = env->GetBooleanField(model, fid);
  72 +
  73 + fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");
  74 + s = (jstring)env->GetObjectField(model, fid);
  75 + p = env->GetStringUTFChars(s, nullptr);
  76 + ans.model.provider = p;
  77 + env->ReleaseStringUTFChars(s, p);
  78 +
  79 + // for ruleFsts
  80 + fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;");
  81 + s = (jstring)env->GetObjectField(config, fid);
  82 + p = env->GetStringUTFChars(s, nullptr);
  83 + ans.rule_fsts = p;
  84 + env->ReleaseStringUTFChars(s, p);
  85 +
  86 + // for ruleFars
  87 + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
  88 + s = (jstring)env->GetObjectField(config, fid);
  89 + p = env->GetStringUTFChars(s, nullptr);
  90 + ans.rule_fars = p;
  91 + env->ReleaseStringUTFChars(s, p);
  92 +
  93 + fid = env->GetFieldID(cls, "maxNumSentences", "I");
  94 + ans.max_num_sentences = env->GetIntField(config, fid);
  95 +
  96 + return ans;
  97 +}
  98 +
  99 +} // namespace sherpa_onnx
  100 +
  101 +SHERPA_ONNX_EXTERN_C
  102 +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newForAsset(
  103 + JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
  104 +#if __ANDROID_API__ >= 9
  105 + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  106 + if (!mgr) {
  107 + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
  108 + }
  109 +#endif
  110 + auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
  111 + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
  112 +
  113 + auto tts = new sherpa_onnx::OfflineTts(
  114 +#if __ANDROID_API__ >= 9
  115 + mgr,
  116 +#endif
  117 + config);
  118 +
  119 + return (jlong)tts;
  120 +}
  121 +
  122 +SHERPA_ONNX_EXTERN_C
  123 +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile(
  124 + JNIEnv *env, jobject /*obj*/, jobject _config) {
  125 + auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
  126 + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
  127 +
  128 + if (!config.Validate()) {
  129 + SHERPA_ONNX_LOGE("Errors found in config!");
  130 + }
  131 +
  132 + auto tts = new sherpa_onnx::OfflineTts(config);
  133 +
  134 + return (jlong)tts;
  135 +}
  136 +
  137 +SHERPA_ONNX_EXTERN_C
  138 +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_delete(
  139 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  140 + delete reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr);
  141 +}
  142 +
  143 +SHERPA_ONNX_EXTERN_C
  144 +JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getSampleRate(
  145 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  146 + return reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->SampleRate();
  147 +}
  148 +
  149 +SHERPA_ONNX_EXTERN_C
  150 +JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers(
  151 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  152 + return reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->NumSpeakers();
  153 +}
  154 +
  155 +SHERPA_ONNX_EXTERN_C
  156 +JNIEXPORT jobjectArray JNICALL
  157 +Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/,
  158 + jlong ptr, jstring text,
  159 + jint sid, jfloat speed) {
  160 + const char *p_text = env->GetStringUTFChars(text, nullptr);
  161 + SHERPA_ONNX_LOGE("string is: %s", p_text);
  162 +
  163 + auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
  164 + p_text, sid, speed);
  165 +
  166 + jfloatArray samples_arr = env->NewFloatArray(audio.samples.size());
  167 + env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(),
  168 + audio.samples.data());
  169 +
  170 + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
  171 + 2, env->FindClass("java/lang/Object"), nullptr);
  172 +
  173 + env->SetObjectArrayElement(obj_arr, 0, samples_arr);
  174 + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate));
  175 +
  176 + env->ReleaseStringUTFChars(text, p_text);
  177 +
  178 + return obj_arr;
  179 +}
  180 +
  181 +SHERPA_ONNX_EXTERN_C
  182 +JNIEXPORT jobjectArray JNICALL
  183 +Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
  184 + JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid,
  185 + jfloat speed, jobject callback) {
  186 + const char *p_text = env->GetStringUTFChars(text, nullptr);
  187 + SHERPA_ONNX_LOGE("string is: %s", p_text);
  188 +
  189 + std::function<void(const float *, int32_t, float)> callback_wrapper =
  190 + [env, callback](const float *samples, int32_t n, float /*progress*/) {
  191 + jclass cls = env->GetObjectClass(callback);
  192 + jmethodID mid = env->GetMethodID(cls, "invoke", "([F)V");
  193 +
  194 + jfloatArray samples_arr = env->NewFloatArray(n);
  195 + env->SetFloatArrayRegion(samples_arr, 0, n, samples);
  196 + env->CallVoidMethod(callback, mid, samples_arr);
  197 + };
  198 +
  199 + auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
  200 + p_text, sid, speed, callback_wrapper);
  201 +
  202 + jfloatArray samples_arr = env->NewFloatArray(audio.samples.size());
  203 + env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(),
  204 + audio.samples.data());
  205 +
  206 + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
  207 + 2, env->FindClass("java/lang/Object"), nullptr);
  208 +
  209 + env->SetObjectArrayElement(obj_arr, 0, samples_arr);
  210 + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate));
  211 +
  212 + env->ReleaseStringUTFChars(text, p_text);
  213 +
  214 + return obj_arr;
  215 +}