Committed by
GitHub
Refactor TTS Android code to support jieba for Chinese TTS models (#800)
正在显示
40 个修改的文件
包含
352 行增加
和
285 行删除
| @@ -93,3 +93,5 @@ sr-data | @@ -93,3 +93,5 @@ sr-data | ||
| 93 | vits-icefall-* | 93 | vits-icefall-* |
| 94 | sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 | 94 | sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 |
| 95 | spoken-language-identification-test-wavs | 95 | spoken-language-identification-test-wavs |
| 96 | +my-release-key* | ||
| 97 | +vits-zh-hf-fanchen-C |
| @@ -158,6 +158,7 @@ class MainActivity : AppCompatActivity() { | @@ -158,6 +158,7 @@ class MainActivity : AppCompatActivity() { | ||
| 158 | var ruleFars: String? | 158 | var ruleFars: String? |
| 159 | var lexicon: String? | 159 | var lexicon: String? |
| 160 | var dataDir: String? | 160 | var dataDir: String? |
| 161 | + var dictDir: String? | ||
| 161 | var assets: AssetManager? = application.assets | 162 | var assets: AssetManager? = application.assets |
| 162 | 163 | ||
| 163 | // The purpose of such a design is to make the CI test easier | 164 | // The purpose of such a design is to make the CI test easier |
| @@ -169,6 +170,7 @@ class MainActivity : AppCompatActivity() { | @@ -169,6 +170,7 @@ class MainActivity : AppCompatActivity() { | ||
| 169 | ruleFars = null | 170 | ruleFars = null |
| 170 | lexicon = null | 171 | lexicon = null |
| 171 | dataDir = null | 172 | dataDir = null |
| 173 | + dictDir = null | ||
| 172 | 174 | ||
| 173 | // Example 1: | 175 | // Example 1: |
| 174 | // modelDir = "vits-vctk" | 176 | // modelDir = "vits-vctk" |
| @@ -191,21 +193,36 @@ class MainActivity : AppCompatActivity() { | @@ -191,21 +193,36 @@ class MainActivity : AppCompatActivity() { | ||
| 191 | // lexicon = "lexicon.txt" | 193 | // lexicon = "lexicon.txt" |
| 192 | 194 | ||
| 193 | // Example 4: | 195 | // Example 4: |
| 196 | + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers | ||
| 197 | + // modelDir = "vits-zh-hf-fanchen-C" | ||
| 198 | + // modelName = "vits-zh-hf-fanchen-C.onnx" | ||
| 199 | + // lexicon = "lexicon.txt" | ||
| 200 | + // dictDir = "vits-zh-hf-fanchen-C/dict" | ||
| 201 | + | ||
| 202 | + // Example 5: | ||
| 194 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 | 203 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 |
| 195 | // modelDir = "vits-coqui-de-css10" | 204 | // modelDir = "vits-coqui-de-css10" |
| 196 | // modelName = "model.onnx" | 205 | // modelName = "model.onnx" |
| 197 | - // lang = "deu" | ||
| 198 | 206 | ||
| 199 | if (dataDir != null) { | 207 | if (dataDir != null) { |
| 200 | - val newDir = copyDataDir(modelDir) | 208 | + val newDir = copyDataDir(modelDir!!) |
| 201 | modelDir = newDir + "/" + modelDir | 209 | modelDir = newDir + "/" + modelDir |
| 202 | dataDir = newDir + "/" + dataDir | 210 | dataDir = newDir + "/" + dataDir |
| 203 | assets = null | 211 | assets = null |
| 204 | } | 212 | } |
| 205 | 213 | ||
| 214 | + if (dictDir != null) { | ||
| 215 | + val newDir = copyDataDir( modelDir!!) | ||
| 216 | + modelDir = newDir + "/" + modelDir | ||
| 217 | + dictDir = modelDir + "/" + "dict" | ||
| 218 | + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | ||
| 219 | + assets = null | ||
| 220 | + } | ||
| 221 | + | ||
| 206 | val config = getOfflineTtsConfig( | 222 | val config = getOfflineTtsConfig( |
| 207 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", | 223 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", |
| 208 | dataDir = dataDir ?: "", | 224 | dataDir = dataDir ?: "", |
| 225 | + dictDir = dictDir ?: "", | ||
| 209 | ruleFsts = ruleFsts ?: "", | 226 | ruleFsts = ruleFsts ?: "", |
| 210 | ruleFars = ruleFars ?: "", | 227 | ruleFars = ruleFars ?: "", |
| 211 | )!! | 228 | )!! |
| @@ -8,6 +8,7 @@ data class OfflineTtsVitsModelConfig( | @@ -8,6 +8,7 @@ data class OfflineTtsVitsModelConfig( | ||
| 8 | var lexicon: String = "", | 8 | var lexicon: String = "", |
| 9 | var tokens: String, | 9 | var tokens: String, |
| 10 | var dataDir: String = "", | 10 | var dataDir: String = "", |
| 11 | + var dictDir: String = "", | ||
| 11 | var noiseScale: Float = 0.667f, | 12 | var noiseScale: Float = 0.667f, |
| 12 | var noiseScaleW: Float = 0.8f, | 13 | var noiseScaleW: Float = 0.8f, |
| 13 | var lengthScale: Float = 1.0f, | 14 | var lengthScale: Float = 1.0f, |
| @@ -49,7 +50,7 @@ class OfflineTts( | @@ -49,7 +50,7 @@ class OfflineTts( | ||
| 49 | 50 | ||
| 50 | init { | 51 | init { |
| 51 | if (assetManager != null) { | 52 | if (assetManager != null) { |
| 52 | - ptr = new(assetManager, config) | 53 | + ptr = newFromAsset(assetManager, config) |
| 53 | } else { | 54 | } else { |
| 54 | ptr = newFromFile(config) | 55 | ptr = newFromFile(config) |
| 55 | } | 56 | } |
| @@ -87,7 +88,7 @@ class OfflineTts( | @@ -87,7 +88,7 @@ class OfflineTts( | ||
| 87 | fun allocate(assetManager: AssetManager? = null) { | 88 | fun allocate(assetManager: AssetManager? = null) { |
| 88 | if (ptr == 0L) { | 89 | if (ptr == 0L) { |
| 89 | if (assetManager != null) { | 90 | if (assetManager != null) { |
| 90 | - ptr = new(assetManager, config) | 91 | + ptr = newFromAsset(assetManager, config) |
| 91 | } else { | 92 | } else { |
| 92 | ptr = newFromFile(config) | 93 | ptr = newFromFile(config) |
| 93 | } | 94 | } |
| @@ -105,7 +106,7 @@ class OfflineTts( | @@ -105,7 +106,7 @@ class OfflineTts( | ||
| 105 | delete(ptr) | 106 | delete(ptr) |
| 106 | } | 107 | } |
| 107 | 108 | ||
| 108 | - private external fun new( | 109 | + private external fun newFromAsset( |
| 109 | assetManager: AssetManager, | 110 | assetManager: AssetManager, |
| 110 | config: OfflineTtsConfig, | 111 | config: OfflineTtsConfig, |
| 111 | ): Long | 112 | ): Long |
| @@ -152,6 +153,7 @@ fun getOfflineTtsConfig( | @@ -152,6 +153,7 @@ fun getOfflineTtsConfig( | ||
| 152 | modelName: String, | 153 | modelName: String, |
| 153 | lexicon: String, | 154 | lexicon: String, |
| 154 | dataDir: String, | 155 | dataDir: String, |
| 156 | + dictDir: String, | ||
| 155 | ruleFsts: String, | 157 | ruleFsts: String, |
| 156 | ruleFars: String | 158 | ruleFars: String |
| 157 | ): OfflineTtsConfig? { | 159 | ): OfflineTtsConfig? { |
| @@ -161,7 +163,8 @@ fun getOfflineTtsConfig( | @@ -161,7 +163,8 @@ fun getOfflineTtsConfig( | ||
| 161 | model = "$modelDir/$modelName", | 163 | model = "$modelDir/$modelName", |
| 162 | lexicon = "$modelDir/$lexicon", | 164 | lexicon = "$modelDir/$lexicon", |
| 163 | tokens = "$modelDir/tokens.txt", | 165 | tokens = "$modelDir/tokens.txt", |
| 164 | - dataDir = "$dataDir" | 166 | + dataDir = dataDir, |
| 167 | + dictDir = dictDir, | ||
| 165 | ), | 168 | ), |
| 166 | numThreads = 2, | 169 | numThreads = 2, |
| 167 | debug = true, | 170 | debug = true, |
| @@ -42,6 +42,7 @@ object TtsEngine { | @@ -42,6 +42,7 @@ object TtsEngine { | ||
| 42 | private var ruleFars: String? = null | 42 | private var ruleFars: String? = null |
| 43 | private var lexicon: String? = null | 43 | private var lexicon: String? = null |
| 44 | private var dataDir: String? = null | 44 | private var dataDir: String? = null |
| 45 | + private var dictDir: String? = null | ||
| 45 | private var assets: AssetManager? = null | 46 | private var assets: AssetManager? = null |
| 46 | 47 | ||
| 47 | init { | 48 | init { |
| @@ -54,6 +55,7 @@ object TtsEngine { | @@ -54,6 +55,7 @@ object TtsEngine { | ||
| 54 | ruleFars = null | 55 | ruleFars = null |
| 55 | lexicon = null | 56 | lexicon = null |
| 56 | dataDir = null | 57 | dataDir = null |
| 58 | + dictDir = null | ||
| 57 | lang = null | 59 | lang = null |
| 58 | 60 | ||
| 59 | // Please enable one and only one of the examples below | 61 | // Please enable one and only one of the examples below |
| @@ -83,6 +85,14 @@ object TtsEngine { | @@ -83,6 +85,14 @@ object TtsEngine { | ||
| 83 | // lang = "zho" | 85 | // lang = "zho" |
| 84 | 86 | ||
| 85 | // Example 4: | 87 | // Example 4: |
| 88 | + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers | ||
| 89 | + // modelDir = "vits-zh-hf-fanchen-C" | ||
| 90 | + // modelName = "vits-zh-hf-fanchen-C.onnx" | ||
| 91 | + // lexicon = "lexicon.txt" | ||
| 92 | + // dictDir = "vits-zh-hf-fanchen-C/dict" | ||
| 93 | + // lang = "zho" | ||
| 94 | + | ||
| 95 | + // Example 5: | ||
| 86 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 | 96 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 |
| 87 | // This model does not need lexicon or dataDir | 97 | // This model does not need lexicon or dataDir |
| 88 | // modelDir = "vits-coqui-de-css10" | 98 | // modelDir = "vits-coqui-de-css10" |
| @@ -108,9 +118,18 @@ object TtsEngine { | @@ -108,9 +118,18 @@ object TtsEngine { | ||
| 108 | assets = null | 118 | assets = null |
| 109 | } | 119 | } |
| 110 | 120 | ||
| 121 | + if (dictDir != null) { | ||
| 122 | + val newDir = copyDataDir(context, modelDir!!) | ||
| 123 | + modelDir = newDir + "/" + modelDir | ||
| 124 | + dictDir = modelDir + "/" + "dict" | ||
| 125 | + ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | ||
| 126 | + assets = null | ||
| 127 | + } | ||
| 128 | + | ||
| 111 | val config = getOfflineTtsConfig( | 129 | val config = getOfflineTtsConfig( |
| 112 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", | 130 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", |
| 113 | dataDir = dataDir ?: "", | 131 | dataDir = dataDir ?: "", |
| 132 | + dictDir = dictDir ?: "", | ||
| 114 | ruleFsts = ruleFsts ?: "", | 133 | ruleFsts = ruleFsts ?: "", |
| 115 | ruleFars = ruleFars ?: "" | 134 | ruleFars = ruleFars ?: "" |
| 116 | )!! | 135 | )!! |
| @@ -47,7 +47,7 @@ onnxruntime_version=1.17.1 | @@ -47,7 +47,7 @@ onnxruntime_version=1.17.1 | ||
| 47 | if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then | 47 | if [ ! -f $onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then |
| 48 | mkdir -p $onnxruntime_version | 48 | mkdir -p $onnxruntime_version |
| 49 | pushd $onnxruntime_version | 49 | pushd $onnxruntime_version |
| 50 | - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip | 50 | + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip |
| 51 | unzip onnxruntime-android-${onnxruntime_version}.zip | 51 | unzip onnxruntime-android-${onnxruntime_version}.zip |
| 52 | rm onnxruntime-android-${onnxruntime_version}.zip | 52 | rm onnxruntime-android-${onnxruntime_version}.zip |
| 53 | popd | 53 | popd |
| @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 | @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 | ||
| 48 | if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then | 48 | if [ ! -f $onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then |
| 49 | mkdir -p $onnxruntime_version | 49 | mkdir -p $onnxruntime_version |
| 50 | pushd $onnxruntime_version | 50 | pushd $onnxruntime_version |
| 51 | - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip | 51 | + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip |
| 52 | unzip onnxruntime-android-${onnxruntime_version}.zip | 52 | unzip onnxruntime-android-${onnxruntime_version}.zip |
| 53 | rm onnxruntime-android-${onnxruntime_version}.zip | 53 | rm onnxruntime-android-${onnxruntime_version}.zip |
| 54 | popd | 54 | popd |
| @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 | @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 | ||
| 48 | if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then | 48 | if [ ! -f $onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then |
| 49 | mkdir -p $onnxruntime_version | 49 | mkdir -p $onnxruntime_version |
| 50 | pushd $onnxruntime_version | 50 | pushd $onnxruntime_version |
| 51 | - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip | 51 | + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip |
| 52 | unzip onnxruntime-android-${onnxruntime_version}.zip | 52 | unzip onnxruntime-android-${onnxruntime_version}.zip |
| 53 | rm onnxruntime-android-${onnxruntime_version}.zip | 53 | rm onnxruntime-android-${onnxruntime_version}.zip |
| 54 | popd | 54 | popd |
| @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 | @@ -48,7 +48,7 @@ onnxruntime_version=1.17.1 | ||
| 48 | if [ ! -f $onnxruntime_version/jni/x86/libonnxruntime.so ]; then | 48 | if [ ! -f $onnxruntime_version/jni/x86/libonnxruntime.so ]; then |
| 49 | mkdir -p $onnxruntime_version | 49 | mkdir -p $onnxruntime_version |
| 50 | pushd $onnxruntime_version | 50 | pushd $onnxruntime_version |
| 51 | - wget -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip | 51 | + wget -c -q https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${onnxruntime_version}/onnxruntime-android-${onnxruntime_version}.zip |
| 52 | unzip onnxruntime-android-${onnxruntime_version}.zip | 52 | unzip onnxruntime-android-${onnxruntime_version}.zip |
| 53 | rm onnxruntime-android-${onnxruntime_version}.zip | 53 | rm onnxruntime-android-${onnxruntime_version}.zip |
| 54 | popd | 54 | popd |
| @@ -61,6 +61,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | @@ -61,6 +61,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | ||
| 61 | sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt | 61 | sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt |
| 62 | {% endif %} | 62 | {% endif %} |
| 63 | 63 | ||
| 64 | +{% if tts_model.dict_dir %} | ||
| 65 | + dict_dir={{ tts_model.dict_dir }} | ||
| 66 | + sed -i.bak s%"dictDir = null"%"dictDir = \"$dict_dir\""% ./TtsEngine.kt | ||
| 67 | +{% endif %} | ||
| 68 | + | ||
| 64 | {% if tts_model.data_dir %} | 69 | {% if tts_model.data_dir %} |
| 65 | data_dir={{ tts_model.data_dir }} | 70 | data_dir={{ tts_model.data_dir }} |
| 66 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt | 71 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt |
| @@ -59,6 +59,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt | @@ -59,6 +59,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt | ||
| 59 | sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt | 59 | sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt |
| 60 | {% endif %} | 60 | {% endif %} |
| 61 | 61 | ||
| 62 | +{% if tts_model.dict_dir %} | ||
| 63 | + dict_dir={{ tts_model.dict_dir }} | ||
| 64 | + sed -i.bak s%"dictDir = null"%"dictDir = \"$dict_dir\""% ./MainActivity.kt | ||
| 65 | +{% endif %} | ||
| 66 | + | ||
| 62 | {% if tts_model.data_dir %} | 67 | {% if tts_model.data_dir %} |
| 63 | data_dir={{ tts_model.data_dir }} | 68 | data_dir={{ tts_model.data_dir }} |
| 64 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt | 69 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt |
| @@ -35,6 +35,7 @@ class TtsModel: | @@ -35,6 +35,7 @@ class TtsModel: | ||
| 35 | rule_fsts: Optional[List[str]] = None | 35 | rule_fsts: Optional[List[str]] = None |
| 36 | rule_fars: Optional[List[str]] = None | 36 | rule_fars: Optional[List[str]] = None |
| 37 | data_dir: Optional[str] = None | 37 | data_dir: Optional[str] = None |
| 38 | + dict_dir: Optional[str] = None | ||
| 38 | is_char: bool = False | 39 | is_char: bool = False |
| 39 | lang_iso_639_3: str = "" | 40 | lang_iso_639_3: str = "" |
| 40 | 41 | ||
| @@ -326,8 +327,14 @@ def get_vits_models() -> List[TtsModel]: | @@ -326,8 +327,14 @@ def get_vits_models() -> List[TtsModel]: | ||
| 326 | rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"] | 327 | rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"] |
| 327 | for m in chinese_models: | 328 | for m in chinese_models: |
| 328 | s = [f"{m.model_dir}/{r}" for r in rule_fsts] | 329 | s = [f"{m.model_dir}/{r}" for r in rule_fsts] |
| 330 | + if "vits-zh-hf" in m.model_dir: | ||
| 331 | + s = s[:-1] | ||
| 332 | + m.dict_dir = m.model_dir + "/dict" | ||
| 333 | + | ||
| 329 | m.rule_fsts = ",".join(s) | 334 | m.rule_fsts = ",".join(s) |
| 330 | - m.rule_fars = f"{m.model_dir}/rule.far" | 335 | + |
| 336 | + if "vits-zh-hf" not in m.model_dir: | ||
| 337 | + m.rule_fars = f"{m.model_dir}/rule.far" | ||
| 331 | 338 | ||
| 332 | all_models = chinese_models + [ | 339 | all_models = chinese_models + [ |
| 333 | TtsModel( | 340 | TtsModel( |
| @@ -32,7 +32,7 @@ bool AudioTaggingModelConfig::Validate() const { | @@ -32,7 +32,7 @@ bool AudioTaggingModelConfig::Validate() const { | ||
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | if (!ced.empty() && !FileExists(ced)) { | 34 | if (!ced.empty() && !FileExists(ced)) { |
| 35 | - SHERPA_ONNX_LOGE("CED model file %s does not exist", ced.c_str()); | 35 | + SHERPA_ONNX_LOGE("CED model file '%s' does not exist", ced.c_str()); |
| 36 | return false; | 36 | return false; |
| 37 | } | 37 | } |
| 38 | 38 |
| @@ -48,7 +48,7 @@ bool AudioTaggingConfig::Validate() const { | @@ -48,7 +48,7 @@ bool AudioTaggingConfig::Validate() const { | ||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | if (!FileExists(labels)) { | 50 | if (!FileExists(labels)) { |
| 51 | - SHERPA_ONNX_LOGE("--labels %s does not exist", labels.c_str()); | 51 | + SHERPA_ONNX_LOGE("--labels '%s' does not exist", labels.c_str()); |
| 52 | return false; | 52 | return false; |
| 53 | } | 53 | } |
| 54 | 54 |
| @@ -7,7 +7,7 @@ | @@ -7,7 +7,7 @@ | ||
| 7 | #include <fstream> | 7 | #include <fstream> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | 9 | ||
| 10 | -#include "sherpa-onnx/csrc/log.h" | 10 | +#include "sherpa-onnx/csrc/macros.h" |
| 11 | 11 | ||
| 12 | namespace sherpa_onnx { | 12 | namespace sherpa_onnx { |
| 13 | 13 | ||
| @@ -17,7 +17,7 @@ bool FileExists(const std::string &filename) { | @@ -17,7 +17,7 @@ bool FileExists(const std::string &filename) { | ||
| 17 | 17 | ||
| 18 | void AssertFileExists(const std::string &filename) { | 18 | void AssertFileExists(const std::string &filename) { |
| 19 | if (!FileExists(filename)) { | 19 | if (!FileExists(filename)) { |
| 20 | - SHERPA_ONNX_LOG(FATAL) << filename << " does not exist!"; | 20 | + SHERPA_ONNX_LOGE("filename '%s' does not exist", filename.c_str()); |
| 21 | exit(-1); | 21 | exit(-1); |
| 22 | } | 22 | } |
| 23 | } | 23 | } |
| @@ -146,6 +146,14 @@ class JiebaLexicon::Impl { | @@ -146,6 +146,14 @@ class JiebaLexicon::Impl { | ||
| 146 | if (token2id_.count(p.first) && !token2id_.count(p.second)) { | 146 | if (token2id_.count(p.first) && !token2id_.count(p.second)) { |
| 147 | token2id_[p.second] = token2id_[p.first]; | 147 | token2id_[p.second] = token2id_[p.first]; |
| 148 | } | 148 | } |
| 149 | + | ||
| 150 | + if (!token2id_.count(p.first) && token2id_.count(p.second)) { | ||
| 151 | + token2id_[p.first] = token2id_[p.second]; | ||
| 152 | + } | ||
| 153 | + } | ||
| 154 | + | ||
| 155 | + if (!token2id_.count("、") && token2id_.count(",")) { | ||
| 156 | + token2id_["、"] = token2id_[","]; | ||
| 149 | } | 157 | } |
| 150 | } | 158 | } |
| 151 | 159 |
| @@ -101,7 +101,8 @@ bool KeywordSpotterConfig::Validate() const { | @@ -101,7 +101,8 @@ bool KeywordSpotterConfig::Validate() const { | ||
| 101 | // Solution: take keyword_file variable is directly | 101 | // Solution: take keyword_file variable is directly |
| 102 | // parsed as a string of keywords | 102 | // parsed as a string of keywords |
| 103 | if (!std::ifstream(keywords_file.c_str()).good()) { | 103 | if (!std::ifstream(keywords_file.c_str()).good()) { |
| 104 | - SHERPA_ONNX_LOGE("Keywords file %s does not exist.", keywords_file.c_str()); | 104 | + SHERPA_ONNX_LOGE("Keywords file '%s' does not exist.", |
| 105 | + keywords_file.c_str()); | ||
| 105 | return false; | 106 | return false; |
| 106 | } | 107 | } |
| 107 | #endif | 108 | #endif |
| @@ -34,7 +34,7 @@ void OfflineCtcFstDecoderConfig::Register(ParseOptions *po) { | @@ -34,7 +34,7 @@ void OfflineCtcFstDecoderConfig::Register(ParseOptions *po) { | ||
| 34 | 34 | ||
| 35 | bool OfflineCtcFstDecoderConfig::Validate() const { | 35 | bool OfflineCtcFstDecoderConfig::Validate() const { |
| 36 | if (!graph.empty() && !FileExists(graph)) { | 36 | if (!graph.empty() && !FileExists(graph)) { |
| 37 | - SHERPA_ONNX_LOGE("graph: %s does not exist", graph.c_str()); | 37 | + SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str()); |
| 38 | return false; | 38 | return false; |
| 39 | } | 39 | } |
| 40 | return true; | 40 | return true; |
| @@ -22,7 +22,7 @@ void OfflineLMConfig::Register(ParseOptions *po) { | @@ -22,7 +22,7 @@ void OfflineLMConfig::Register(ParseOptions *po) { | ||
| 22 | 22 | ||
| 23 | bool OfflineLMConfig::Validate() const { | 23 | bool OfflineLMConfig::Validate() const { |
| 24 | if (!FileExists(model)) { | 24 | if (!FileExists(model)) { |
| 25 | - SHERPA_ONNX_LOGE("%s does not exist", model.c_str()); | 25 | + SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str()); |
| 26 | return false; | 26 | return false; |
| 27 | } | 27 | } |
| 28 | 28 |
| @@ -16,7 +16,7 @@ void OfflineNemoEncDecCtcModelConfig::Register(ParseOptions *po) { | @@ -16,7 +16,7 @@ void OfflineNemoEncDecCtcModelConfig::Register(ParseOptions *po) { | ||
| 16 | 16 | ||
| 17 | bool OfflineNemoEncDecCtcModelConfig::Validate() const { | 17 | bool OfflineNemoEncDecCtcModelConfig::Validate() const { |
| 18 | if (!FileExists(model)) { | 18 | if (!FileExists(model)) { |
| 19 | - SHERPA_ONNX_LOGE("NeMo model: %s does not exist", model.c_str()); | 19 | + SHERPA_ONNX_LOGE("NeMo model: '%s' does not exist", model.c_str()); |
| 20 | return false; | 20 | return false; |
| 21 | } | 21 | } |
| 22 | 22 |
| @@ -15,7 +15,7 @@ void OfflineParaformerModelConfig::Register(ParseOptions *po) { | @@ -15,7 +15,7 @@ void OfflineParaformerModelConfig::Register(ParseOptions *po) { | ||
| 15 | 15 | ||
| 16 | bool OfflineParaformerModelConfig::Validate() const { | 16 | bool OfflineParaformerModelConfig::Validate() const { |
| 17 | if (!FileExists(model)) { | 17 | if (!FileExists(model)) { |
| 18 | - SHERPA_ONNX_LOGE("Paraformer model %s does not exist", model.c_str()); | 18 | + SHERPA_ONNX_LOGE("Paraformer model '%s' does not exist", model.c_str()); |
| 19 | return false; | 19 | return false; |
| 20 | } | 20 | } |
| 21 | 21 |
| @@ -18,19 +18,19 @@ void OfflineTransducerModelConfig::Register(ParseOptions *po) { | @@ -18,19 +18,19 @@ void OfflineTransducerModelConfig::Register(ParseOptions *po) { | ||
| 18 | 18 | ||
| 19 | bool OfflineTransducerModelConfig::Validate() const { | 19 | bool OfflineTransducerModelConfig::Validate() const { |
| 20 | if (!FileExists(encoder_filename)) { | 20 | if (!FileExists(encoder_filename)) { |
| 21 | - SHERPA_ONNX_LOGE("transducer encoder: %s does not exist", | 21 | + SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist", |
| 22 | encoder_filename.c_str()); | 22 | encoder_filename.c_str()); |
| 23 | return false; | 23 | return false; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | if (!FileExists(decoder_filename)) { | 26 | if (!FileExists(decoder_filename)) { |
| 27 | - SHERPA_ONNX_LOGE("transducer decoder: %s does not exist", | 27 | + SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist", |
| 28 | decoder_filename.c_str()); | 28 | decoder_filename.c_str()); |
| 29 | return false; | 29 | return false; |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | if (!FileExists(joiner_filename)) { | 32 | if (!FileExists(joiner_filename)) { |
| 33 | - SHERPA_ONNX_LOGE("transducer joiner: %s does not exist", | 33 | + SHERPA_ONNX_LOGE("transducer joiner: '%s' does not exist", |
| 34 | joiner_filename.c_str()); | 34 | joiner_filename.c_str()); |
| 35 | return false; | 35 | return false; |
| 36 | } | 36 | } |
| @@ -35,7 +35,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { | @@ -35,7 +35,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { | ||
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | if (!FileExists(model)) { | 37 | if (!FileExists(model)) { |
| 38 | - SHERPA_ONNX_LOGE("--vits-model: %s does not exist", model.c_str()); | 38 | + SHERPA_ONNX_LOGE("--vits-model: '%s' does not exist", model.c_str()); |
| 39 | return false; | 39 | return false; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| @@ -45,31 +45,31 @@ bool OfflineTtsVitsModelConfig::Validate() const { | @@ -45,31 +45,31 @@ bool OfflineTtsVitsModelConfig::Validate() const { | ||
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | if (!FileExists(tokens)) { | 47 | if (!FileExists(tokens)) { |
| 48 | - SHERPA_ONNX_LOGE("--vits-tokens: %s does not exist", tokens.c_str()); | 48 | + SHERPA_ONNX_LOGE("--vits-tokens: '%s' does not exist", tokens.c_str()); |
| 49 | return false; | 49 | return false; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | if (!data_dir.empty()) { | 52 | if (!data_dir.empty()) { |
| 53 | if (!FileExists(data_dir + "/phontab")) { | 53 | if (!FileExists(data_dir + "/phontab")) { |
| 54 | - SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test", | 54 | + SHERPA_ONNX_LOGE("'%s/phontab' does not exist. Skipping test", |
| 55 | data_dir.c_str()); | 55 | data_dir.c_str()); |
| 56 | return false; | 56 | return false; |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | if (!FileExists(data_dir + "/phonindex")) { | 59 | if (!FileExists(data_dir + "/phonindex")) { |
| 60 | - SHERPA_ONNX_LOGE("%s/phonindex does not exist. Skipping test", | 60 | + SHERPA_ONNX_LOGE("'%s/phonindex' does not exist. Skipping test", |
| 61 | data_dir.c_str()); | 61 | data_dir.c_str()); |
| 62 | return false; | 62 | return false; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | if (!FileExists(data_dir + "/phondata")) { | 65 | if (!FileExists(data_dir + "/phondata")) { |
| 66 | - SHERPA_ONNX_LOGE("%s/phondata does not exist. Skipping test", | 66 | + SHERPA_ONNX_LOGE("'%s/phondata' does not exist. Skipping test", |
| 67 | data_dir.c_str()); | 67 | data_dir.c_str()); |
| 68 | return false; | 68 | return false; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | if (!FileExists(data_dir + "/intonations")) { | 71 | if (!FileExists(data_dir + "/intonations")) { |
| 72 | - SHERPA_ONNX_LOGE("%s/intonations does not exist.", data_dir.c_str()); | 72 | + SHERPA_ONNX_LOGE("'%s/intonations' does not exist.", data_dir.c_str()); |
| 73 | return false; | 73 | return false; |
| 74 | } | 74 | } |
| 75 | } | 75 | } |
| @@ -82,7 +82,8 @@ bool OfflineTtsVitsModelConfig::Validate() const { | @@ -82,7 +82,8 @@ bool OfflineTtsVitsModelConfig::Validate() const { | ||
| 82 | 82 | ||
| 83 | for (const auto &f : required_files) { | 83 | for (const auto &f : required_files) { |
| 84 | if (!FileExists(dict_dir + "/" + f)) { | 84 | if (!FileExists(dict_dir + "/" + f)) { |
| 85 | - SHERPA_ONNX_LOGE("%s/%s does not exist.", data_dir.c_str(), f.c_str()); | 85 | + SHERPA_ONNX_LOGE("'%s/%s' does not exist.", data_dir.c_str(), |
| 86 | + f.c_str()); | ||
| 86 | return false; | 87 | return false; |
| 87 | } | 88 | } |
| 88 | } | 89 | } |
| @@ -42,7 +42,7 @@ bool OfflineTtsConfig::Validate() const { | @@ -42,7 +42,7 @@ bool OfflineTtsConfig::Validate() const { | ||
| 42 | SplitStringToVector(rule_fsts, ",", false, &files); | 42 | SplitStringToVector(rule_fsts, ",", false, &files); |
| 43 | for (const auto &f : files) { | 43 | for (const auto &f : files) { |
| 44 | if (!FileExists(f)) { | 44 | if (!FileExists(f)) { |
| 45 | - SHERPA_ONNX_LOGE("Rule fst %s does not exist. ", f.c_str()); | 45 | + SHERPA_ONNX_LOGE("Rule fst '%s' does not exist. ", f.c_str()); |
| 46 | return false; | 46 | return false; |
| 47 | } | 47 | } |
| 48 | } | 48 | } |
| @@ -53,7 +53,7 @@ bool OfflineTtsConfig::Validate() const { | @@ -53,7 +53,7 @@ bool OfflineTtsConfig::Validate() const { | ||
| 53 | SplitStringToVector(rule_fars, ",", false, &files); | 53 | SplitStringToVector(rule_fars, ",", false, &files); |
| 54 | for (const auto &f : files) { | 54 | for (const auto &f : files) { |
| 55 | if (!FileExists(f)) { | 55 | if (!FileExists(f)) { |
| 56 | - SHERPA_ONNX_LOGE("Rule far %s does not exist. ", f.c_str()); | 56 | + SHERPA_ONNX_LOGE("Rule far '%s' does not exist. ", f.c_str()); |
| 57 | return false; | 57 | return false; |
| 58 | } | 58 | } |
| 59 | } | 59 | } |
| @@ -18,7 +18,7 @@ void OfflineWenetCtcModelConfig::Register(ParseOptions *po) { | @@ -18,7 +18,7 @@ void OfflineWenetCtcModelConfig::Register(ParseOptions *po) { | ||
| 18 | 18 | ||
| 19 | bool OfflineWenetCtcModelConfig::Validate() const { | 19 | bool OfflineWenetCtcModelConfig::Validate() const { |
| 20 | if (!FileExists(model)) { | 20 | if (!FileExists(model)) { |
| 21 | - SHERPA_ONNX_LOGE("WeNet model: %s does not exist", model.c_str()); | 21 | + SHERPA_ONNX_LOGE("WeNet model: '%s' does not exist", model.c_str()); |
| 22 | return false; | 22 | return false; |
| 23 | } | 23 | } |
| 24 | 24 |
| @@ -48,7 +48,8 @@ bool OfflineWhisperModelConfig::Validate() const { | @@ -48,7 +48,8 @@ bool OfflineWhisperModelConfig::Validate() const { | ||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | if (!FileExists(encoder)) { | 50 | if (!FileExists(encoder)) { |
| 51 | - SHERPA_ONNX_LOGE("whisper encoder file %s does not exist", encoder.c_str()); | 51 | + SHERPA_ONNX_LOGE("whisper encoder file '%s' does not exist", |
| 52 | + encoder.c_str()); | ||
| 52 | return false; | 53 | return false; |
| 53 | } | 54 | } |
| 54 | 55 | ||
| @@ -58,7 +59,8 @@ bool OfflineWhisperModelConfig::Validate() const { | @@ -58,7 +59,8 @@ bool OfflineWhisperModelConfig::Validate() const { | ||
| 58 | } | 59 | } |
| 59 | 60 | ||
| 60 | if (!FileExists(decoder)) { | 61 | if (!FileExists(decoder)) { |
| 61 | - SHERPA_ONNX_LOGE("whisper decoder file %s does not exist", decoder.c_str()); | 62 | + SHERPA_ONNX_LOGE("whisper decoder file '%s' does not exist", |
| 63 | + decoder.c_str()); | ||
| 62 | return false; | 64 | return false; |
| 63 | } | 65 | } |
| 64 | 66 |
| @@ -21,7 +21,7 @@ bool OfflineZipformerAudioTaggingModelConfig::Validate() const { | @@ -21,7 +21,7 @@ bool OfflineZipformerAudioTaggingModelConfig::Validate() const { | ||
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | if (!FileExists(model)) { | 23 | if (!FileExists(model)) { |
| 24 | - SHERPA_ONNX_LOGE("--zipformer-model: %s does not exist", model.c_str()); | 24 | + SHERPA_ONNX_LOGE("--zipformer-model: '%s' does not exist", model.c_str()); |
| 25 | return false; | 25 | return false; |
| 26 | } | 26 | } |
| 27 | 27 |
| @@ -15,7 +15,7 @@ void OfflineZipformerCtcModelConfig::Register(ParseOptions *po) { | @@ -15,7 +15,7 @@ void OfflineZipformerCtcModelConfig::Register(ParseOptions *po) { | ||
| 15 | 15 | ||
| 16 | bool OfflineZipformerCtcModelConfig::Validate() const { | 16 | bool OfflineZipformerCtcModelConfig::Validate() const { |
| 17 | if (!FileExists(model)) { | 17 | if (!FileExists(model)) { |
| 18 | - SHERPA_ONNX_LOGE("zipformer CTC model file %s does not exist", | 18 | + SHERPA_ONNX_LOGE("zipformer CTC model file '%s' does not exist", |
| 19 | model.c_str()); | 19 | model.c_str()); |
| 20 | return false; | 20 | return false; |
| 21 | } | 21 | } |
| @@ -31,7 +31,7 @@ void OnlineCtcFstDecoderConfig::Register(ParseOptions *po) { | @@ -31,7 +31,7 @@ void OnlineCtcFstDecoderConfig::Register(ParseOptions *po) { | ||
| 31 | 31 | ||
| 32 | bool OnlineCtcFstDecoderConfig::Validate() const { | 32 | bool OnlineCtcFstDecoderConfig::Validate() const { |
| 33 | if (!graph.empty() && !FileExists(graph)) { | 33 | if (!graph.empty() && !FileExists(graph)) { |
| 34 | - SHERPA_ONNX_LOGE("graph: %s does not exist", graph.c_str()); | 34 | + SHERPA_ONNX_LOGE("graph: '%s' does not exist", graph.c_str()); |
| 35 | return false; | 35 | return false; |
| 36 | } | 36 | } |
| 37 | return true; | 37 | return true; |
| @@ -22,7 +22,7 @@ void OnlineLMConfig::Register(ParseOptions *po) { | @@ -22,7 +22,7 @@ void OnlineLMConfig::Register(ParseOptions *po) { | ||
| 22 | 22 | ||
| 23 | bool OnlineLMConfig::Validate() const { | 23 | bool OnlineLMConfig::Validate() const { |
| 24 | if (!FileExists(model)) { | 24 | if (!FileExists(model)) { |
| 25 | - SHERPA_ONNX_LOGE("%s does not exist", model.c_str()); | 25 | + SHERPA_ONNX_LOGE("'%s' does not exist", model.c_str()); |
| 26 | return false; | 26 | return false; |
| 27 | } | 27 | } |
| 28 | 28 |
| @@ -45,7 +45,7 @@ bool OnlineModelConfig::Validate() const { | @@ -45,7 +45,7 @@ bool OnlineModelConfig::Validate() const { | ||
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | if (!FileExists(tokens)) { | 47 | if (!FileExists(tokens)) { |
| 48 | - SHERPA_ONNX_LOGE("tokens: %s does not exist", tokens.c_str()); | 48 | + SHERPA_ONNX_LOGE("tokens: '%s' does not exist", tokens.c_str()); |
| 49 | return false; | 49 | return false; |
| 50 | } | 50 | } |
| 51 | 51 |
| @@ -18,12 +18,12 @@ void OnlineParaformerModelConfig::Register(ParseOptions *po) { | @@ -18,12 +18,12 @@ void OnlineParaformerModelConfig::Register(ParseOptions *po) { | ||
| 18 | 18 | ||
| 19 | bool OnlineParaformerModelConfig::Validate() const { | 19 | bool OnlineParaformerModelConfig::Validate() const { |
| 20 | if (!FileExists(encoder)) { | 20 | if (!FileExists(encoder)) { |
| 21 | - SHERPA_ONNX_LOGE("Paraformer encoder %s does not exist", encoder.c_str()); | 21 | + SHERPA_ONNX_LOGE("Paraformer encoder '%s' does not exist", encoder.c_str()); |
| 22 | return false; | 22 | return false; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | if (!FileExists(decoder)) { | 25 | if (!FileExists(decoder)) { |
| 26 | - SHERPA_ONNX_LOGE("Paraformer decoder %s does not exist", decoder.c_str()); | 26 | + SHERPA_ONNX_LOGE("Paraformer decoder '%s' does not exist", decoder.c_str()); |
| 27 | return false; | 27 | return false; |
| 28 | } | 28 | } |
| 29 | 29 |
| @@ -18,17 +18,19 @@ void OnlineTransducerModelConfig::Register(ParseOptions *po) { | @@ -18,17 +18,19 @@ void OnlineTransducerModelConfig::Register(ParseOptions *po) { | ||
| 18 | 18 | ||
| 19 | bool OnlineTransducerModelConfig::Validate() const { | 19 | bool OnlineTransducerModelConfig::Validate() const { |
| 20 | if (!FileExists(encoder)) { | 20 | if (!FileExists(encoder)) { |
| 21 | - SHERPA_ONNX_LOGE("transducer encoder: %s does not exist", encoder.c_str()); | 21 | + SHERPA_ONNX_LOGE("transducer encoder: '%s' does not exist", |
| 22 | + encoder.c_str()); | ||
| 22 | return false; | 23 | return false; |
| 23 | } | 24 | } |
| 24 | 25 | ||
| 25 | if (!FileExists(decoder)) { | 26 | if (!FileExists(decoder)) { |
| 26 | - SHERPA_ONNX_LOGE("transducer decoder: %s does not exist", decoder.c_str()); | 27 | + SHERPA_ONNX_LOGE("transducer decoder: '%s' does not exist", |
| 28 | + decoder.c_str()); | ||
| 27 | return false; | 29 | return false; |
| 28 | } | 30 | } |
| 29 | 31 | ||
| 30 | if (!FileExists(joiner)) { | 32 | if (!FileExists(joiner)) { |
| 31 | - SHERPA_ONNX_LOGE("joiner: %s does not exist", joiner.c_str()); | 33 | + SHERPA_ONNX_LOGE("joiner: '%s' does not exist", joiner.c_str()); |
| 32 | return false; | 34 | return false; |
| 33 | } | 35 | } |
| 34 | 36 |
| @@ -21,7 +21,7 @@ void OnlineWenetCtcModelConfig::Register(ParseOptions *po) { | @@ -21,7 +21,7 @@ void OnlineWenetCtcModelConfig::Register(ParseOptions *po) { | ||
| 21 | 21 | ||
| 22 | bool OnlineWenetCtcModelConfig::Validate() const { | 22 | bool OnlineWenetCtcModelConfig::Validate() const { |
| 23 | if (!FileExists(model)) { | 23 | if (!FileExists(model)) { |
| 24 | - SHERPA_ONNX_LOGE("WeNet CTC model %s does not exist", model.c_str()); | 24 | + SHERPA_ONNX_LOGE("WeNet CTC model '%s' does not exist", model.c_str()); |
| 25 | return false; | 25 | return false; |
| 26 | } | 26 | } |
| 27 | 27 |
| @@ -22,7 +22,8 @@ bool OnlineZipformer2CtcModelConfig::Validate() const { | @@ -22,7 +22,8 @@ bool OnlineZipformer2CtcModelConfig::Validate() const { | ||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | if (!FileExists(model)) { | 24 | if (!FileExists(model)) { |
| 25 | - SHERPA_ONNX_LOGE("--zipformer2-ctc-model %s does not exist", model.c_str()); | 25 | + SHERPA_ONNX_LOGE("--zipformer2-ctc-model '%s' does not exist", |
| 26 | + model.c_str()); | ||
| 26 | return false; | 27 | return false; |
| 27 | } | 28 | } |
| 28 | 29 |
| @@ -44,7 +44,8 @@ bool SileroVadModelConfig::Validate() const { | @@ -44,7 +44,8 @@ bool SileroVadModelConfig::Validate() const { | ||
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | if (!FileExists(model)) { | 46 | if (!FileExists(model)) { |
| 47 | - SHERPA_ONNX_LOGE("Silero vad model file %s does not exist", model.c_str()); | 47 | + SHERPA_ONNX_LOGE("Silero vad model file '%s' does not exist", |
| 48 | + model.c_str()); | ||
| 48 | return false; | 49 | return false; |
| 49 | } | 50 | } |
| 50 | 51 |
| @@ -31,7 +31,7 @@ bool SpeakerEmbeddingExtractorConfig::Validate() const { | @@ -31,7 +31,7 @@ bool SpeakerEmbeddingExtractorConfig::Validate() const { | ||
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | if (!FileExists(model)) { | 33 | if (!FileExists(model)) { |
| 34 | - SHERPA_ONNX_LOGE("--speaker-embedding-model: %s does not exist", | 34 | + SHERPA_ONNX_LOGE("--speaker-embedding-model: '%s' does not exist", |
| 35 | model.c_str()); | 35 | model.c_str()); |
| 36 | return false; | 36 | return false; |
| 37 | } | 37 | } |
| @@ -43,7 +43,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const { | @@ -43,7 +43,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const { | ||
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | if (!FileExists(encoder)) { | 45 | if (!FileExists(encoder)) { |
| 46 | - SHERPA_ONNX_LOGE("whisper encoder file %s does not exist", encoder.c_str()); | 46 | + SHERPA_ONNX_LOGE("whisper encoder file '%s' does not exist", |
| 47 | + encoder.c_str()); | ||
| 47 | return false; | 48 | return false; |
| 48 | } | 49 | } |
| 49 | 50 | ||
| @@ -53,7 +54,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const { | @@ -53,7 +54,8 @@ bool SpokenLanguageIdentificationWhisperConfig::Validate() const { | ||
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | if (!FileExists(decoder)) { | 56 | if (!FileExists(decoder)) { |
| 56 | - SHERPA_ONNX_LOGE("whisper decoder file %s does not exist", decoder.c_str()); | 57 | + SHERPA_ONNX_LOGE("whisper decoder file '%s' does not exist", |
| 58 | + decoder.c_str()); | ||
| 57 | return false; | 59 | return false; |
| 58 | } | 60 | } |
| 59 | 61 |
| @@ -9,11 +9,20 @@ if(NOT DEFINED ANDROID_ABI) | @@ -9,11 +9,20 @@ if(NOT DEFINED ANDROID_ABI) | ||
| 9 | include_directories($ENV{JAVA_HOME}/include/darwin) | 9 | include_directories($ENV{JAVA_HOME}/include/darwin) |
| 10 | endif() | 10 | endif() |
| 11 | 11 | ||
| 12 | -add_library(sherpa-onnx-jni | 12 | +set(sources |
| 13 | audio-tagging.cc | 13 | audio-tagging.cc |
| 14 | jni.cc | 14 | jni.cc |
| 15 | offline-stream.cc | 15 | offline-stream.cc |
| 16 | spoken-language-identification.cc | 16 | spoken-language-identification.cc |
| 17 | ) | 17 | ) |
| 18 | + | ||
| 19 | +if(SHERPA_ONNX_ENABLE_TTS) | ||
| 20 | + list(APPEND sources | ||
| 21 | + offline-tts.cc | ||
| 22 | + ) | ||
| 23 | +endif() | ||
| 24 | + | ||
| 25 | +add_library(sherpa-onnx-jni ${sources}) | ||
| 26 | + | ||
| 18 | target_link_libraries(sherpa-onnx-jni sherpa-onnx-core) | 27 | target_link_libraries(sherpa-onnx-jni sherpa-onnx-core) |
| 19 | install(TARGETS sherpa-onnx-jni DESTINATION lib) | 28 | install(TARGETS sherpa-onnx-jni DESTINATION lib) |
| @@ -24,10 +24,6 @@ | @@ -24,10 +24,6 @@ | ||
| 24 | #include "sherpa-onnx/csrc/wave-writer.h" | 24 | #include "sherpa-onnx/csrc/wave-writer.h" |
| 25 | #include "sherpa-onnx/jni/common.h" | 25 | #include "sherpa-onnx/jni/common.h" |
| 26 | 26 | ||
| 27 | -#if SHERPA_ONNX_ENABLE_TTS == 1 | ||
| 28 | -#include "sherpa-onnx/csrc/offline-tts.h" | ||
| 29 | -#endif | ||
| 30 | - | ||
| 31 | namespace sherpa_onnx { | 27 | namespace sherpa_onnx { |
| 32 | 28 | ||
| 33 | class SherpaOnnx { | 29 | class SherpaOnnx { |
| @@ -775,113 +771,6 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { | @@ -775,113 +771,6 @@ static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) { | ||
| 775 | return ans; | 771 | return ans; |
| 776 | } | 772 | } |
| 777 | 773 | ||
| 778 | -#if SHERPA_ONNX_ENABLE_TTS == 1 | ||
| 779 | -class SherpaOnnxOfflineTts { | ||
| 780 | - public: | ||
| 781 | -#if __ANDROID_API__ >= 9 | ||
| 782 | - SherpaOnnxOfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config) | ||
| 783 | - : tts_(mgr, config) {} | ||
| 784 | -#endif | ||
| 785 | - explicit SherpaOnnxOfflineTts(const OfflineTtsConfig &config) | ||
| 786 | - : tts_(config) {} | ||
| 787 | - | ||
| 788 | - GeneratedAudio Generate(const std::string &text, int64_t sid = 0, | ||
| 789 | - float speed = 1.0, | ||
| 790 | - std::function<void(const float *, int32_t, float)> | ||
| 791 | - callback = nullptr) const { | ||
| 792 | - return tts_.Generate(text, sid, speed, callback); | ||
| 793 | - } | ||
| 794 | - | ||
| 795 | - int32_t SampleRate() const { return tts_.SampleRate(); } | ||
| 796 | - | ||
| 797 | - int32_t NumSpeakers() const { return tts_.NumSpeakers(); } | ||
| 798 | - | ||
| 799 | - private: | ||
| 800 | - OfflineTts tts_; | ||
| 801 | -}; | ||
| 802 | - | ||
| 803 | -static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { | ||
| 804 | - OfflineTtsConfig ans; | ||
| 805 | - | ||
| 806 | - jclass cls = env->GetObjectClass(config); | ||
| 807 | - jfieldID fid; | ||
| 808 | - | ||
| 809 | - fid = env->GetFieldID(cls, "model", | ||
| 810 | - "Lcom/k2fsa/sherpa/onnx/OfflineTtsModelConfig;"); | ||
| 811 | - jobject model = env->GetObjectField(config, fid); | ||
| 812 | - jclass model_config_cls = env->GetObjectClass(model); | ||
| 813 | - | ||
| 814 | - fid = env->GetFieldID(model_config_cls, "vits", | ||
| 815 | - "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;"); | ||
| 816 | - jobject vits = env->GetObjectField(model, fid); | ||
| 817 | - jclass vits_cls = env->GetObjectClass(vits); | ||
| 818 | - | ||
| 819 | - fid = env->GetFieldID(vits_cls, "model", "Ljava/lang/String;"); | ||
| 820 | - jstring s = (jstring)env->GetObjectField(vits, fid); | ||
| 821 | - const char *p = env->GetStringUTFChars(s, nullptr); | ||
| 822 | - ans.model.vits.model = p; | ||
| 823 | - env->ReleaseStringUTFChars(s, p); | ||
| 824 | - | ||
| 825 | - fid = env->GetFieldID(vits_cls, "lexicon", "Ljava/lang/String;"); | ||
| 826 | - s = (jstring)env->GetObjectField(vits, fid); | ||
| 827 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 828 | - ans.model.vits.lexicon = p; | ||
| 829 | - env->ReleaseStringUTFChars(s, p); | ||
| 830 | - | ||
| 831 | - fid = env->GetFieldID(vits_cls, "tokens", "Ljava/lang/String;"); | ||
| 832 | - s = (jstring)env->GetObjectField(vits, fid); | ||
| 833 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 834 | - ans.model.vits.tokens = p; | ||
| 835 | - env->ReleaseStringUTFChars(s, p); | ||
| 836 | - | ||
| 837 | - fid = env->GetFieldID(vits_cls, "dataDir", "Ljava/lang/String;"); | ||
| 838 | - s = (jstring)env->GetObjectField(vits, fid); | ||
| 839 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 840 | - ans.model.vits.data_dir = p; | ||
| 841 | - env->ReleaseStringUTFChars(s, p); | ||
| 842 | - | ||
| 843 | - fid = env->GetFieldID(vits_cls, "noiseScale", "F"); | ||
| 844 | - ans.model.vits.noise_scale = env->GetFloatField(vits, fid); | ||
| 845 | - | ||
| 846 | - fid = env->GetFieldID(vits_cls, "noiseScaleW", "F"); | ||
| 847 | - ans.model.vits.noise_scale_w = env->GetFloatField(vits, fid); | ||
| 848 | - | ||
| 849 | - fid = env->GetFieldID(vits_cls, "lengthScale", "F"); | ||
| 850 | - ans.model.vits.length_scale = env->GetFloatField(vits, fid); | ||
| 851 | - | ||
| 852 | - fid = env->GetFieldID(model_config_cls, "numThreads", "I"); | ||
| 853 | - ans.model.num_threads = env->GetIntField(model, fid); | ||
| 854 | - | ||
| 855 | - fid = env->GetFieldID(model_config_cls, "debug", "Z"); | ||
| 856 | - ans.model.debug = env->GetBooleanField(model, fid); | ||
| 857 | - | ||
| 858 | - fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); | ||
| 859 | - s = (jstring)env->GetObjectField(model, fid); | ||
| 860 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 861 | - ans.model.provider = p; | ||
| 862 | - env->ReleaseStringUTFChars(s, p); | ||
| 863 | - | ||
| 864 | - // for ruleFsts | ||
| 865 | - fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;"); | ||
| 866 | - s = (jstring)env->GetObjectField(config, fid); | ||
| 867 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 868 | - ans.rule_fsts = p; | ||
| 869 | - env->ReleaseStringUTFChars(s, p); | ||
| 870 | - | ||
| 871 | - // for ruleFars | ||
| 872 | - fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;"); | ||
| 873 | - s = (jstring)env->GetObjectField(config, fid); | ||
| 874 | - p = env->GetStringUTFChars(s, nullptr); | ||
| 875 | - ans.rule_fars = p; | ||
| 876 | - env->ReleaseStringUTFChars(s, p); | ||
| 877 | - | ||
| 878 | - fid = env->GetFieldID(cls, "maxNumSentences", "I"); | ||
| 879 | - ans.max_num_sentences = env->GetIntField(config, fid); | ||
| 880 | - | ||
| 881 | - return ans; | ||
| 882 | -} | ||
| 883 | -#endif | ||
| 884 | - | ||
| 885 | } // namespace sherpa_onnx | 774 | } // namespace sherpa_onnx |
| 886 | 775 | ||
| 887 | SHERPA_ONNX_EXTERN_C | 776 | SHERPA_ONNX_EXTERN_C |
| @@ -1226,128 +1115,6 @@ jobject NewFloat(JNIEnv *env, float value) { | @@ -1226,128 +1115,6 @@ jobject NewFloat(JNIEnv *env, float value) { | ||
| 1226 | return env->NewObject(cls, constructor, value); | 1115 | return env->NewObject(cls, constructor, value); |
| 1227 | } | 1116 | } |
| 1228 | 1117 | ||
| 1229 | -#if SHERPA_ONNX_ENABLE_TTS == 1 | ||
| 1230 | -SHERPA_ONNX_EXTERN_C | ||
| 1231 | -JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( | ||
| 1232 | - JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { | ||
| 1233 | -#if __ANDROID_API__ >= 9 | ||
| 1234 | - AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); | ||
| 1235 | - if (!mgr) { | ||
| 1236 | - SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); | ||
| 1237 | - } | ||
| 1238 | -#endif | ||
| 1239 | - auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | ||
| 1240 | - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 1241 | - | ||
| 1242 | - auto tts = new sherpa_onnx::SherpaOnnxOfflineTts( | ||
| 1243 | -#if __ANDROID_API__ >= 9 | ||
| 1244 | - mgr, | ||
| 1245 | -#endif | ||
| 1246 | - config); | ||
| 1247 | - | ||
| 1248 | - return (jlong)tts; | ||
| 1249 | -} | ||
| 1250 | - | ||
| 1251 | -SHERPA_ONNX_EXTERN_C | ||
| 1252 | -JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile( | ||
| 1253 | - JNIEnv *env, jobject /*obj*/, jobject _config) { | ||
| 1254 | - auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | ||
| 1255 | - SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 1256 | - | ||
| 1257 | - if (!config.Validate()) { | ||
| 1258 | - SHERPA_ONNX_LOGE("Errors found in config!"); | ||
| 1259 | - } | ||
| 1260 | - | ||
| 1261 | - auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config); | ||
| 1262 | - | ||
| 1263 | - return (jlong)tts; | ||
| 1264 | -} | ||
| 1265 | - | ||
| 1266 | -SHERPA_ONNX_EXTERN_C | ||
| 1267 | -JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_delete( | ||
| 1268 | - JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 1269 | - delete reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr); | ||
| 1270 | -} | ||
| 1271 | - | ||
| 1272 | -SHERPA_ONNX_EXTERN_C | ||
| 1273 | -JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getSampleRate( | ||
| 1274 | - JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 1275 | - return reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr) | ||
| 1276 | - ->SampleRate(); | ||
| 1277 | -} | ||
| 1278 | - | ||
| 1279 | -SHERPA_ONNX_EXTERN_C | ||
| 1280 | -JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers( | ||
| 1281 | - JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 1282 | - return reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr) | ||
| 1283 | - ->NumSpeakers(); | ||
| 1284 | -} | ||
| 1285 | - | ||
| 1286 | -SHERPA_ONNX_EXTERN_C | ||
| 1287 | -JNIEXPORT jobjectArray JNICALL | ||
| 1288 | -Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/, | ||
| 1289 | - jlong ptr, jstring text, | ||
| 1290 | - jint sid, jfloat speed) { | ||
| 1291 | - const char *p_text = env->GetStringUTFChars(text, nullptr); | ||
| 1292 | - SHERPA_ONNX_LOGE("string is: %s", p_text); | ||
| 1293 | - | ||
| 1294 | - auto audio = | ||
| 1295 | - reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr)->Generate( | ||
| 1296 | - p_text, sid, speed); | ||
| 1297 | - | ||
| 1298 | - jfloatArray samples_arr = env->NewFloatArray(audio.samples.size()); | ||
| 1299 | - env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(), | ||
| 1300 | - audio.samples.data()); | ||
| 1301 | - | ||
| 1302 | - jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 1303 | - 2, env->FindClass("java/lang/Object"), nullptr); | ||
| 1304 | - | ||
| 1305 | - env->SetObjectArrayElement(obj_arr, 0, samples_arr); | ||
| 1306 | - env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate)); | ||
| 1307 | - | ||
| 1308 | - env->ReleaseStringUTFChars(text, p_text); | ||
| 1309 | - | ||
| 1310 | - return obj_arr; | ||
| 1311 | -} | ||
| 1312 | - | ||
| 1313 | -SHERPA_ONNX_EXTERN_C | ||
| 1314 | -JNIEXPORT jobjectArray JNICALL | ||
| 1315 | -Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl( | ||
| 1316 | - JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid, | ||
| 1317 | - jfloat speed, jobject callback) { | ||
| 1318 | - const char *p_text = env->GetStringUTFChars(text, nullptr); | ||
| 1319 | - SHERPA_ONNX_LOGE("string is: %s", p_text); | ||
| 1320 | - | ||
| 1321 | - std::function<void(const float *, int32_t, float)> callback_wrapper = | ||
| 1322 | - [env, callback](const float *samples, int32_t n, float /*p*/) { | ||
| 1323 | - jclass cls = env->GetObjectClass(callback); | ||
| 1324 | - jmethodID mid = env->GetMethodID(cls, "invoke", "([F)V"); | ||
| 1325 | - | ||
| 1326 | - jfloatArray samples_arr = env->NewFloatArray(n); | ||
| 1327 | - env->SetFloatArrayRegion(samples_arr, 0, n, samples); | ||
| 1328 | - env->CallVoidMethod(callback, mid, samples_arr); | ||
| 1329 | - }; | ||
| 1330 | - | ||
| 1331 | - auto audio = | ||
| 1332 | - reinterpret_cast<sherpa_onnx::SherpaOnnxOfflineTts *>(ptr)->Generate( | ||
| 1333 | - p_text, sid, speed, callback_wrapper); | ||
| 1334 | - | ||
| 1335 | - jfloatArray samples_arr = env->NewFloatArray(audio.samples.size()); | ||
| 1336 | - env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(), | ||
| 1337 | - audio.samples.data()); | ||
| 1338 | - | ||
| 1339 | - jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 1340 | - 2, env->FindClass("java/lang/Object"), nullptr); | ||
| 1341 | - | ||
| 1342 | - env->SetObjectArrayElement(obj_arr, 0, samples_arr); | ||
| 1343 | - env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate)); | ||
| 1344 | - | ||
| 1345 | - env->ReleaseStringUTFChars(text, p_text); | ||
| 1346 | - | ||
| 1347 | - return obj_arr; | ||
| 1348 | -} | ||
| 1349 | -#endif | ||
| 1350 | - | ||
| 1351 | SHERPA_ONNX_EXTERN_C | 1118 | SHERPA_ONNX_EXTERN_C |
| 1352 | JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( | 1119 | JNIEXPORT jboolean JNICALL Java_com_k2fsa_sherpa_onnx_GeneratedAudio_saveImpl( |
| 1353 | JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples, | 1120 | JNIEnv *env, jobject /*obj*/, jstring filename, jfloatArray samples, |
sherpa-onnx/jni/offline-tts.cc
0 → 100644
| 1 | +// sherpa-onnx/jni/offline-tts.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/offline-tts.h" | ||
| 6 | + | ||
| 7 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 8 | +#include "sherpa-onnx/jni/common.h" | ||
| 9 | + | ||
| 10 | +namespace sherpa_onnx { | ||
| 11 | + | ||
| 12 | +static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { | ||
| 13 | + OfflineTtsConfig ans; | ||
| 14 | + | ||
| 15 | + jclass cls = env->GetObjectClass(config); | ||
| 16 | + jfieldID fid; | ||
| 17 | + | ||
| 18 | + fid = env->GetFieldID(cls, "model", | ||
| 19 | + "Lcom/k2fsa/sherpa/onnx/OfflineTtsModelConfig;"); | ||
| 20 | + jobject model = env->GetObjectField(config, fid); | ||
| 21 | + jclass model_config_cls = env->GetObjectClass(model); | ||
| 22 | + | ||
| 23 | + fid = env->GetFieldID(model_config_cls, "vits", | ||
| 24 | + "Lcom/k2fsa/sherpa/onnx/OfflineTtsVitsModelConfig;"); | ||
| 25 | + jobject vits = env->GetObjectField(model, fid); | ||
| 26 | + jclass vits_cls = env->GetObjectClass(vits); | ||
| 27 | + | ||
| 28 | + fid = env->GetFieldID(vits_cls, "model", "Ljava/lang/String;"); | ||
| 29 | + jstring s = (jstring)env->GetObjectField(vits, fid); | ||
| 30 | + const char *p = env->GetStringUTFChars(s, nullptr); | ||
| 31 | + ans.model.vits.model = p; | ||
| 32 | + env->ReleaseStringUTFChars(s, p); | ||
| 33 | + | ||
| 34 | + fid = env->GetFieldID(vits_cls, "lexicon", "Ljava/lang/String;"); | ||
| 35 | + s = (jstring)env->GetObjectField(vits, fid); | ||
| 36 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 37 | + ans.model.vits.lexicon = p; | ||
| 38 | + env->ReleaseStringUTFChars(s, p); | ||
| 39 | + | ||
| 40 | + fid = env->GetFieldID(vits_cls, "tokens", "Ljava/lang/String;"); | ||
| 41 | + s = (jstring)env->GetObjectField(vits, fid); | ||
| 42 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 43 | + ans.model.vits.tokens = p; | ||
| 44 | + env->ReleaseStringUTFChars(s, p); | ||
| 45 | + | ||
| 46 | + fid = env->GetFieldID(vits_cls, "dataDir", "Ljava/lang/String;"); | ||
| 47 | + s = (jstring)env->GetObjectField(vits, fid); | ||
| 48 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 49 | + ans.model.vits.data_dir = p; | ||
| 50 | + env->ReleaseStringUTFChars(s, p); | ||
| 51 | + | ||
| 52 | + fid = env->GetFieldID(vits_cls, "dictDir", "Ljava/lang/String;"); | ||
| 53 | + s = (jstring)env->GetObjectField(vits, fid); | ||
| 54 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 55 | + ans.model.vits.dict_dir = p; | ||
| 56 | + env->ReleaseStringUTFChars(s, p); | ||
| 57 | + | ||
| 58 | + fid = env->GetFieldID(vits_cls, "noiseScale", "F"); | ||
| 59 | + ans.model.vits.noise_scale = env->GetFloatField(vits, fid); | ||
| 60 | + | ||
| 61 | + fid = env->GetFieldID(vits_cls, "noiseScaleW", "F"); | ||
| 62 | + ans.model.vits.noise_scale_w = env->GetFloatField(vits, fid); | ||
| 63 | + | ||
| 64 | + fid = env->GetFieldID(vits_cls, "lengthScale", "F"); | ||
| 65 | + ans.model.vits.length_scale = env->GetFloatField(vits, fid); | ||
| 66 | + | ||
| 67 | + fid = env->GetFieldID(model_config_cls, "numThreads", "I"); | ||
| 68 | + ans.model.num_threads = env->GetIntField(model, fid); | ||
| 69 | + | ||
| 70 | + fid = env->GetFieldID(model_config_cls, "debug", "Z"); | ||
| 71 | + ans.model.debug = env->GetBooleanField(model, fid); | ||
| 72 | + | ||
| 73 | + fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;"); | ||
| 74 | + s = (jstring)env->GetObjectField(model, fid); | ||
| 75 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 76 | + ans.model.provider = p; | ||
| 77 | + env->ReleaseStringUTFChars(s, p); | ||
| 78 | + | ||
| 79 | + // for ruleFsts | ||
| 80 | + fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;"); | ||
| 81 | + s = (jstring)env->GetObjectField(config, fid); | ||
| 82 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 83 | + ans.rule_fsts = p; | ||
| 84 | + env->ReleaseStringUTFChars(s, p); | ||
| 85 | + | ||
| 86 | + // for ruleFars | ||
| 87 | + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;"); | ||
| 88 | + s = (jstring)env->GetObjectField(config, fid); | ||
| 89 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 90 | + ans.rule_fars = p; | ||
| 91 | + env->ReleaseStringUTFChars(s, p); | ||
| 92 | + | ||
| 93 | + fid = env->GetFieldID(cls, "maxNumSentences", "I"); | ||
| 94 | + ans.max_num_sentences = env->GetIntField(config, fid); | ||
| 95 | + | ||
| 96 | + return ans; | ||
| 97 | +} | ||
| 98 | + | ||
| 99 | +} // namespace sherpa_onnx | ||
| 100 | + | ||
| 101 | +SHERPA_ONNX_EXTERN_C | ||
| 102 | +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newForAsset( | ||
| 103 | + JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { | ||
| 104 | +#if __ANDROID_API__ >= 9 | ||
| 105 | + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); | ||
| 106 | + if (!mgr) { | ||
| 107 | + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); | ||
| 108 | + } | ||
| 109 | +#endif | ||
| 110 | + auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | ||
| 111 | + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 112 | + | ||
| 113 | + auto tts = new sherpa_onnx::OfflineTts( | ||
| 114 | +#if __ANDROID_API__ >= 9 | ||
| 115 | + mgr, | ||
| 116 | +#endif | ||
| 117 | + config); | ||
| 118 | + | ||
| 119 | + return (jlong)tts; | ||
| 120 | +} | ||
| 121 | + | ||
| 122 | +SHERPA_ONNX_EXTERN_C | ||
| 123 | +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile( | ||
| 124 | + JNIEnv *env, jobject /*obj*/, jobject _config) { | ||
| 125 | + auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | ||
| 126 | + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | ||
| 127 | + | ||
| 128 | + if (!config.Validate()) { | ||
| 129 | + SHERPA_ONNX_LOGE("Errors found in config!"); | ||
| 130 | + } | ||
| 131 | + | ||
| 132 | + auto tts = new sherpa_onnx::OfflineTts(config); | ||
| 133 | + | ||
| 134 | + return (jlong)tts; | ||
| 135 | +} | ||
| 136 | + | ||
| 137 | +SHERPA_ONNX_EXTERN_C | ||
| 138 | +JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_delete( | ||
| 139 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 140 | + delete reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr); | ||
| 141 | +} | ||
| 142 | + | ||
| 143 | +SHERPA_ONNX_EXTERN_C | ||
| 144 | +JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getSampleRate( | ||
| 145 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 146 | + return reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->SampleRate(); | ||
| 147 | +} | ||
| 148 | + | ||
| 149 | +SHERPA_ONNX_EXTERN_C | ||
| 150 | +JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_getNumSpeakers( | ||
| 151 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 152 | + return reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->NumSpeakers(); | ||
| 153 | +} | ||
| 154 | + | ||
| 155 | +SHERPA_ONNX_EXTERN_C | ||
| 156 | +JNIEXPORT jobjectArray JNICALL | ||
| 157 | +Java_com_k2fsa_sherpa_onnx_OfflineTts_generateImpl(JNIEnv *env, jobject /*obj*/, | ||
| 158 | + jlong ptr, jstring text, | ||
| 159 | + jint sid, jfloat speed) { | ||
| 160 | + const char *p_text = env->GetStringUTFChars(text, nullptr); | ||
| 161 | + SHERPA_ONNX_LOGE("string is: %s", p_text); | ||
| 162 | + | ||
| 163 | + auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate( | ||
| 164 | + p_text, sid, speed); | ||
| 165 | + | ||
| 166 | + jfloatArray samples_arr = env->NewFloatArray(audio.samples.size()); | ||
| 167 | + env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(), | ||
| 168 | + audio.samples.data()); | ||
| 169 | + | ||
| 170 | + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 171 | + 2, env->FindClass("java/lang/Object"), nullptr); | ||
| 172 | + | ||
| 173 | + env->SetObjectArrayElement(obj_arr, 0, samples_arr); | ||
| 174 | + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate)); | ||
| 175 | + | ||
| 176 | + env->ReleaseStringUTFChars(text, p_text); | ||
| 177 | + | ||
| 178 | + return obj_arr; | ||
| 179 | +} | ||
| 180 | + | ||
| 181 | +SHERPA_ONNX_EXTERN_C | ||
| 182 | +JNIEXPORT jobjectArray JNICALL | ||
| 183 | +Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl( | ||
| 184 | + JNIEnv *env, jobject /*obj*/, jlong ptr, jstring text, jint sid, | ||
| 185 | + jfloat speed, jobject callback) { | ||
| 186 | + const char *p_text = env->GetStringUTFChars(text, nullptr); | ||
| 187 | + SHERPA_ONNX_LOGE("string is: %s", p_text); | ||
| 188 | + | ||
| 189 | + std::function<void(const float *, int32_t, float)> callback_wrapper = | ||
| 190 | + [env, callback](const float *samples, int32_t n, float /*progress*/) { | ||
| 191 | + jclass cls = env->GetObjectClass(callback); | ||
| 192 | + jmethodID mid = env->GetMethodID(cls, "invoke", "([F)V"); | ||
| 193 | + | ||
| 194 | + jfloatArray samples_arr = env->NewFloatArray(n); | ||
| 195 | + env->SetFloatArrayRegion(samples_arr, 0, n, samples); | ||
| 196 | + env->CallVoidMethod(callback, mid, samples_arr); | ||
| 197 | + }; | ||
| 198 | + | ||
| 199 | + auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate( | ||
| 200 | + p_text, sid, speed, callback_wrapper); | ||
| 201 | + | ||
| 202 | + jfloatArray samples_arr = env->NewFloatArray(audio.samples.size()); | ||
| 203 | + env->SetFloatArrayRegion(samples_arr, 0, audio.samples.size(), | ||
| 204 | + audio.samples.data()); | ||
| 205 | + | ||
| 206 | + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 207 | + 2, env->FindClass("java/lang/Object"), nullptr); | ||
| 208 | + | ||
| 209 | + env->SetObjectArrayElement(obj_arr, 0, samples_arr); | ||
| 210 | + env->SetObjectArrayElement(obj_arr, 1, NewInteger(env, audio.sample_rate)); | ||
| 211 | + | ||
| 212 | + env->ReleaseStringUTFChars(text, p_text); | ||
| 213 | + | ||
| 214 | + return obj_arr; | ||
| 215 | +} |
-
请 注册 或 登录 后发表评论