正在显示
49 个修改的文件
包含
308 行增加
和
143 行删除
| @@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 | @@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 | ||
| 70 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 70 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
| 71 | tar xf vits-piper-en_US-amy-low.tar.bz2 | 71 | tar xf vits-piper-en_US-amy-low.tar.bz2 |
| 72 | node ./test-offline-tts-en.js | 72 | node ./test-offline-tts-en.js |
| 73 | -rm vits-piper-en_US-amy-low.tar.bz2 | 73 | +rm vits-piper-en_US-amy-low* |
| 74 | 74 | ||
| 75 | -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 76 | -tar xvf vits-zh-aishell3.tar.bz2 | 75 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 76 | +tar xvf vits-icefall-zh-aishell3.tar.bz2 | ||
| 77 | node ./test-offline-tts-zh.js | 77 | node ./test-offline-tts-zh.js |
| 78 | -rm vits-zh-aishell3.tar.bz2 | 78 | +rm vits-icefall-zh-aishell3* |
| @@ -173,6 +173,7 @@ jobs: | @@ -173,6 +173,7 @@ jobs: | ||
| 173 | rm -v $dst/lib/libasound.so | 173 | rm -v $dst/lib/libasound.so |
| 174 | rm -v $dst/lib/libonnxruntime.so | 174 | rm -v $dst/lib/libonnxruntime.so |
| 175 | rm -v $dst/lib/libsherpa-onnx-fst.so | 175 | rm -v $dst/lib/libsherpa-onnx-fst.so |
| 176 | + rm -v $dst/lib/libsherpa-onnx-fstfar.so | ||
| 176 | fi | 177 | fi |
| 177 | 178 | ||
| 178 | tree $dst | 179 | tree $dst |
| @@ -211,6 +211,7 @@ jobs: | @@ -211,6 +211,7 @@ jobs: | ||
| 211 | rm -fv $dst/lib/libasound.so | 211 | rm -fv $dst/lib/libasound.so |
| 212 | rm -fv $dst/lib/libonnxruntime.so | 212 | rm -fv $dst/lib/libonnxruntime.so |
| 213 | rm -fv $dst/lib/libsherpa-onnx-fst.so | 213 | rm -fv $dst/lib/libsherpa-onnx-fst.so |
| 214 | + rm -fv $dst/lib/libsherpa-onnx-fstfar.so | ||
| 214 | fi | 215 | fi |
| 215 | 216 | ||
| 216 | tree $dst | 217 | tree $dst |
| @@ -111,9 +111,11 @@ jobs: | @@ -111,9 +111,11 @@ jobs: | ||
| 111 | rm -rf vits-vctk | 111 | rm -rf vits-vctk |
| 112 | 112 | ||
| 113 | echo "Test vits-zh-aishell3" | 113 | echo "Test vits-zh-aishell3" |
| 114 | - git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 | 114 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 115 | + tar xvf vits-icefall-zh-aishell3.tar.bz2 | ||
| 116 | + rm vits-icefall-zh-aishell3.tar.bz2 | ||
| 115 | ./run-vits-zh-aishell3.sh | 117 | ./run-vits-zh-aishell3.sh |
| 116 | - rm -rf vits-zh-aishell3 | 118 | + rm -rf vits-icefall-zh-aishell3 |
| 117 | 119 | ||
| 118 | echo "Test vits-piper-en_US-lessac-medium" | 120 | echo "Test vits-piper-en_US-lessac-medium" |
| 119 | git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium | 121 | git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium |
| @@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() { | @@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() { | ||
| 155 | var modelDir: String? | 155 | var modelDir: String? |
| 156 | var modelName: String? | 156 | var modelName: String? |
| 157 | var ruleFsts: String? | 157 | var ruleFsts: String? |
| 158 | + var ruleFars: String? | ||
| 158 | var lexicon: String? | 159 | var lexicon: String? |
| 159 | var dataDir: String? | 160 | var dataDir: String? |
| 160 | var assets: AssetManager? = application.assets | 161 | var assets: AssetManager? = application.assets |
| @@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() { | @@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() { | ||
| 165 | modelDir = null | 166 | modelDir = null |
| 166 | modelName = null | 167 | modelName = null |
| 167 | ruleFsts = null | 168 | ruleFsts = null |
| 169 | + ruleFars = null | ||
| 168 | lexicon = null | 170 | lexicon = null |
| 169 | dataDir = null | 171 | dataDir = null |
| 170 | 172 | ||
| @@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() { | @@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() { | ||
| 181 | // dataDir = "vits-piper-en_US-amy-low/espeak-ng-data" | 183 | // dataDir = "vits-piper-en_US-amy-low/espeak-ng-data" |
| 182 | 184 | ||
| 183 | // Example 3: | 185 | // Example 3: |
| 184 | - // modelDir = "vits-zh-aishell3" | ||
| 185 | - // modelName = "vits-aishell3.onnx" | ||
| 186 | - // ruleFsts = "vits-zh-aishell3/rule.fst" | 186 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 187 | + // modelDir = "vits-icefall-zh-aishell3" | ||
| 188 | + // modelName = "model.onnx" | ||
| 189 | + // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst," | ||
| 190 | + // ruleFars = "vits-icefall-zh-aishell3/rule.far" | ||
| 187 | // lexicon = "lexicon.txt" | 191 | // lexicon = "lexicon.txt" |
| 188 | 192 | ||
| 189 | // Example 4: | 193 | // Example 4: |
| @@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() { | @@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() { | ||
| 202 | val config = getOfflineTtsConfig( | 206 | val config = getOfflineTtsConfig( |
| 203 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", | 207 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", |
| 204 | dataDir = dataDir ?: "", | 208 | dataDir = dataDir ?: "", |
| 205 | - ruleFsts = ruleFsts ?: "" | 209 | + ruleFsts = ruleFsts ?: "", |
| 210 | + ruleFars = ruleFars ?: "", | ||
| 206 | )!! | 211 | )!! |
| 207 | 212 | ||
| 208 | tts = OfflineTts(assetManager = assets, config = config) | 213 | tts = OfflineTts(assetManager = assets, config = config) |
| @@ -23,6 +23,7 @@ data class OfflineTtsModelConfig( | @@ -23,6 +23,7 @@ data class OfflineTtsModelConfig( | ||
| 23 | data class OfflineTtsConfig( | 23 | data class OfflineTtsConfig( |
| 24 | var model: OfflineTtsModelConfig, | 24 | var model: OfflineTtsModelConfig, |
| 25 | var ruleFsts: String = "", | 25 | var ruleFsts: String = "", |
| 26 | + var ruleFars: String = "", | ||
| 26 | var maxNumSentences: Int = 1, | 27 | var maxNumSentences: Int = 1, |
| 27 | ) | 28 | ) |
| 28 | 29 | ||
| @@ -151,7 +152,8 @@ fun getOfflineTtsConfig( | @@ -151,7 +152,8 @@ fun getOfflineTtsConfig( | ||
| 151 | modelName: String, | 152 | modelName: String, |
| 152 | lexicon: String, | 153 | lexicon: String, |
| 153 | dataDir: String, | 154 | dataDir: String, |
| 154 | - ruleFsts: String | 155 | + ruleFsts: String, |
| 156 | + ruleFars: String | ||
| 155 | ): OfflineTtsConfig? { | 157 | ): OfflineTtsConfig? { |
| 156 | return OfflineTtsConfig( | 158 | return OfflineTtsConfig( |
| 157 | model = OfflineTtsModelConfig( | 159 | model = OfflineTtsModelConfig( |
| @@ -166,5 +168,6 @@ fun getOfflineTtsConfig( | @@ -166,5 +168,6 @@ fun getOfflineTtsConfig( | ||
| 166 | provider = "cpu", | 168 | provider = "cpu", |
| 167 | ), | 169 | ), |
| 168 | ruleFsts = ruleFsts, | 170 | ruleFsts = ruleFsts, |
| 171 | + ruleFars = ruleFars, | ||
| 169 | ) | 172 | ) |
| 170 | } | 173 | } |
| @@ -39,6 +39,7 @@ object TtsEngine { | @@ -39,6 +39,7 @@ object TtsEngine { | ||
| 39 | private var modelDir: String? = null | 39 | private var modelDir: String? = null |
| 40 | private var modelName: String? = null | 40 | private var modelName: String? = null |
| 41 | private var ruleFsts: String? = null | 41 | private var ruleFsts: String? = null |
| 42 | + private var ruleFars: String? = null | ||
| 42 | private var lexicon: String? = null | 43 | private var lexicon: String? = null |
| 43 | private var dataDir: String? = null | 44 | private var dataDir: String? = null |
| 44 | private var assets: AssetManager? = null | 45 | private var assets: AssetManager? = null |
| @@ -50,6 +51,7 @@ object TtsEngine { | @@ -50,6 +51,7 @@ object TtsEngine { | ||
| 50 | modelDir = null | 51 | modelDir = null |
| 51 | modelName = null | 52 | modelName = null |
| 52 | ruleFsts = null | 53 | ruleFsts = null |
| 54 | + ruleFars = null | ||
| 53 | lexicon = null | 55 | lexicon = null |
| 54 | dataDir = null | 56 | dataDir = null |
| 55 | lang = null | 57 | lang = null |
| @@ -73,9 +75,10 @@ object TtsEngine { | @@ -73,9 +75,10 @@ object TtsEngine { | ||
| 73 | 75 | ||
| 74 | // Example 3: | 76 | // Example 3: |
| 75 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 | 77 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 76 | - // modelDir = "vits-zh-aishell3" | ||
| 77 | - // modelName = "vits-aishell3.onnx" | ||
| 78 | - // ruleFsts = "vits-zh-aishell3/rule.fst" | 78 | + // modelDir = "vits-icefall-zh-aishell3" |
| 79 | + // modelName = "model.onnx" | ||
| 80 | + // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst" | ||
| 81 | + // ruleFars = "vits-icefall-zh-aishell3/rule.far" | ||
| 79 | // lexicon = "lexicon.txt" | 82 | // lexicon = "lexicon.txt" |
| 80 | // lang = "zho" | 83 | // lang = "zho" |
| 81 | 84 | ||
| @@ -108,7 +111,8 @@ object TtsEngine { | @@ -108,7 +111,8 @@ object TtsEngine { | ||
| 108 | val config = getOfflineTtsConfig( | 111 | val config = getOfflineTtsConfig( |
| 109 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", | 112 | modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", |
| 110 | dataDir = dataDir ?: "", | 113 | dataDir = dataDir ?: "", |
| 111 | - ruleFsts = ruleFsts ?: "" | 114 | + ruleFsts = ruleFsts ?: "", |
| 115 | + ruleFars = ruleFars ?: "" | ||
| 112 | )!! | 116 | )!! |
| 113 | 117 | ||
| 114 | tts = OfflineTts(assetManager = assets, config = config) | 118 | tts = OfflineTts(assetManager = assets, config = config) |
| @@ -124,6 +124,7 @@ echo "Generate xcframework" | @@ -124,6 +124,7 @@ echo "Generate xcframework" | ||
| 124 | 124 | ||
| 125 | mkdir -p "build/simulator/lib" | 125 | mkdir -p "build/simulator/lib" |
| 126 | for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \ | 126 | for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \ |
| 127 | + libsherpa-onnx-fstfar.a \ | ||
| 127 | libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a \ | 128 | libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a \ |
| 128 | libucd.a libpiper_phonemize.a libespeak-ng.a; do | 129 | libucd.a libpiper_phonemize.a libespeak-ng.a; do |
| 129 | lipo -create build/simulator_arm64/lib/${f} \ | 130 | lipo -create build/simulator_arm64/lib/${f} \ |
| @@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \ | @@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \ | ||
| 137 | build/simulator/lib/libkaldi-native-fbank-core.a \ | 138 | build/simulator/lib/libkaldi-native-fbank-core.a \ |
| 138 | build/simulator/lib/libsherpa-onnx-c-api.a \ | 139 | build/simulator/lib/libsherpa-onnx-c-api.a \ |
| 139 | build/simulator/lib/libsherpa-onnx-core.a \ | 140 | build/simulator/lib/libsherpa-onnx-core.a \ |
| 141 | + build/simulator/lib/libsherpa-onnx-fstfar.a \ | ||
| 140 | build/simulator/lib/libsherpa-onnx-fst.a \ | 142 | build/simulator/lib/libsherpa-onnx-fst.a \ |
| 141 | build/simulator/lib/libsherpa-onnx-kaldifst-core.a \ | 143 | build/simulator/lib/libsherpa-onnx-kaldifst-core.a \ |
| 142 | build/simulator/lib/libkaldi-decoder-core.a \ | 144 | build/simulator/lib/libkaldi-decoder-core.a \ |
| @@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \ | @@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \ | ||
| 148 | build/os64/lib/libkaldi-native-fbank-core.a \ | 150 | build/os64/lib/libkaldi-native-fbank-core.a \ |
| 149 | build/os64/lib/libsherpa-onnx-c-api.a \ | 151 | build/os64/lib/libsherpa-onnx-c-api.a \ |
| 150 | build/os64/lib/libsherpa-onnx-core.a \ | 152 | build/os64/lib/libsherpa-onnx-core.a \ |
| 153 | + build/os64/lib/libsherpa-onnx-fstfar.a \ | ||
| 151 | build/os64/lib/libsherpa-onnx-fst.a \ | 154 | build/os64/lib/libsherpa-onnx-fst.a \ |
| 152 | build/os64/lib/libsherpa-onnx-kaldifst-core.a \ | 155 | build/os64/lib/libsherpa-onnx-kaldifst-core.a \ |
| 153 | build/os64/lib/libkaldi-decoder-core.a \ | 156 | build/os64/lib/libkaldi-decoder-core.a \ |
| @@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \ | @@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \ | ||
| 27 | ./install/lib/libsherpa-onnx-c-api.a \ | 27 | ./install/lib/libsherpa-onnx-c-api.a \ |
| 28 | ./install/lib/libsherpa-onnx-core.a \ | 28 | ./install/lib/libsherpa-onnx-core.a \ |
| 29 | ./install/lib/libkaldi-native-fbank-core.a \ | 29 | ./install/lib/libkaldi-native-fbank-core.a \ |
| 30 | + ./install/lib/libsherpa-onnx-fstfar.a \ | ||
| 30 | ./install/lib/libsherpa-onnx-fst.a \ | 31 | ./install/lib/libsherpa-onnx-fst.a \ |
| 31 | ./install/lib/libsherpa-onnx-kaldifst-core.a \ | 32 | ./install/lib/libsherpa-onnx-kaldifst-core.a \ |
| 32 | ./install/lib/libkaldi-decoder-core.a \ | 33 | ./install/lib/libkaldi-decoder-core.a \ |
| @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) | @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) | ||
| 4 | CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ | 4 | CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ |
| 5 | LDFLAGS := -L ../build/lib | 5 | LDFLAGS := -L ../build/lib |
| 6 | LDFLAGS += -L ../build/_deps/onnxruntime-src/lib | 6 | LDFLAGS += -L ../build/_deps/onnxruntime-src/lib |
| 7 | -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime | 7 | +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime |
| 8 | LDFLAGS += -framework Foundation | 8 | LDFLAGS += -framework Foundation |
| 9 | LDFLAGS += -lc++ | 9 | LDFLAGS += -lc++ |
| 10 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib | 10 | LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib |
| @@ -78,6 +78,7 @@ def get_binaries(): | @@ -78,6 +78,7 @@ def get_binaries(): | ||
| 78 | "piper_phonemize.dll", | 78 | "piper_phonemize.dll", |
| 79 | "sherpa-onnx-c-api.dll", | 79 | "sherpa-onnx-c-api.dll", |
| 80 | "sherpa-onnx-core.dll", | 80 | "sherpa-onnx-core.dll", |
| 81 | + "sherpa-onnx-fstfar.lib", | ||
| 81 | "sherpa-onnx-fst.lib", | 82 | "sherpa-onnx-fst.lib", |
| 82 | "sherpa-onnx-kaldifst-core.lib", | 83 | "sherpa-onnx-kaldifst-core.lib", |
| 83 | "sherpa-onnx-portaudio.dll", | 84 | "sherpa-onnx-portaudio.dll", |
| @@ -64,12 +64,22 @@ function(download_kaldi_decoder) | @@ -64,12 +64,22 @@ function(download_kaldi_decoder) | ||
| 64 | kaldifst_core | 64 | kaldifst_core |
| 65 | fst | 65 | fst |
| 66 | DESTINATION ..) | 66 | DESTINATION ..) |
| 67 | + if(SHERPA_ONNX_ENABLE_TTS) | ||
| 68 | + install(TARGETS | ||
| 69 | + fstfar | ||
| 70 | + DESTINATION ..) | ||
| 71 | + endif() | ||
| 67 | else() | 72 | else() |
| 68 | install(TARGETS | 73 | install(TARGETS |
| 69 | kaldi-decoder-core | 74 | kaldi-decoder-core |
| 70 | kaldifst_core | 75 | kaldifst_core |
| 71 | fst | 76 | fst |
| 72 | DESTINATION lib) | 77 | DESTINATION lib) |
| 78 | + if(SHERPA_ONNX_ENABLE_TTS) | ||
| 79 | + install(TARGETS | ||
| 80 | + fstfar | ||
| 81 | + DESTINATION lib) | ||
| 82 | + endif() | ||
| 73 | endif() | 83 | endif() |
| 74 | 84 | ||
| 75 | if(WIN32 AND BUILD_SHARED_LIBS) | 85 | if(WIN32 AND BUILD_SHARED_LIBS) |
| @@ -78,6 +88,11 @@ function(download_kaldi_decoder) | @@ -78,6 +88,11 @@ function(download_kaldi_decoder) | ||
| 78 | kaldifst_core | 88 | kaldifst_core |
| 79 | fst | 89 | fst |
| 80 | DESTINATION bin) | 90 | DESTINATION bin) |
| 91 | + if(SHERPA_ONNX_ENABLE_TTS) | ||
| 92 | + install(TARGETS | ||
| 93 | + fstfar | ||
| 94 | + DESTINATION bin) | ||
| 95 | + endif() | ||
| 81 | endif() | 96 | endif() |
| 82 | endfunction() | 97 | endfunction() |
| 83 | 98 |
| @@ -50,13 +50,7 @@ function(download_kaldifst) | @@ -50,13 +50,7 @@ function(download_kaldifst) | ||
| 50 | ${kaldifst_SOURCE_DIR}/ | 50 | ${kaldifst_SOURCE_DIR}/ |
| 51 | ) | 51 | ) |
| 52 | 52 | ||
| 53 | - target_include_directories(fst | ||
| 54 | - PUBLIC | ||
| 55 | - ${openfst_SOURCE_DIR}/src/include | ||
| 56 | - ) | ||
| 57 | - | ||
| 58 | set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core") | 53 | set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core") |
| 59 | - set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst") | ||
| 60 | endfunction() | 54 | endfunction() |
| 61 | 55 | ||
| 62 | download_kaldifst() | 56 | download_kaldifst() |
| @@ -4,7 +4,7 @@ function(download_openfst) | @@ -4,7 +4,7 @@ function(download_openfst) | ||
| 4 | include(FetchContent) | 4 | include(FetchContent) |
| 5 | 5 | ||
| 6 | set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz") | 6 | set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz") |
| 7 | - set(openfst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/openfst-win-1.6.5.1.tar.gz") | 7 | + set(openfst_URL2 "https://hub.nuaa.cf/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz") |
| 8 | set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e") | 8 | set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e") |
| 9 | 9 | ||
| 10 | # If you don't have access to the Internet, | 10 | # If you don't have access to the Internet, |
| @@ -31,7 +31,7 @@ function(download_openfst) | @@ -31,7 +31,7 @@ function(download_openfst) | ||
| 31 | set(HAVE_COMPACT OFF CACHE BOOL "" FORCE) | 31 | set(HAVE_COMPACT OFF CACHE BOOL "" FORCE) |
| 32 | set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE) | 32 | set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE) |
| 33 | set(HAVE_CONST OFF CACHE BOOL "" FORCE) | 33 | set(HAVE_CONST OFF CACHE BOOL "" FORCE) |
| 34 | - set(HAVE_FAR OFF CACHE BOOL "" FORCE) | 34 | + set(HAVE_FAR ON CACHE BOOL "" FORCE) |
| 35 | set(HAVE_GRM OFF CACHE BOOL "" FORCE) | 35 | set(HAVE_GRM OFF CACHE BOOL "" FORCE) |
| 36 | set(HAVE_PDT OFF CACHE BOOL "" FORCE) | 36 | set(HAVE_PDT OFF CACHE BOOL "" FORCE) |
| 37 | set(HAVE_MPDT OFF CACHE BOOL "" FORCE) | 37 | set(HAVE_MPDT OFF CACHE BOOL "" FORCE) |
| @@ -70,20 +70,21 @@ function(download_openfst) | @@ -70,20 +70,21 @@ function(download_openfst) | ||
| 70 | add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL) | 70 | add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL) |
| 71 | set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE) | 71 | set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE) |
| 72 | 72 | ||
| 73 | - # Rename libfst.so.6 to libkaldifst_fst.so.6 to avoid potential conflicts | ||
| 74 | - # when kaldifst is installed. | ||
| 75 | - set_target_properties(fst PROPERTIES OUTPUT_NAME "kaldifst_fst") | 73 | + # Rename libfst.so.6 to libsherpa-onnx-fst.so.6 to avoid potential conflicts |
| 74 | + # when sherpa-onnx is installed. | ||
| 75 | + set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst") | ||
| 76 | + set_target_properties(fstfar PROPERTIES OUTPUT_NAME "sherpa-onnx-fstfar") | ||
| 76 | 77 | ||
| 77 | - install(TARGETS fst | ||
| 78 | - DESTINATION lib | 78 | + target_include_directories(fst |
| 79 | + PUBLIC | ||
| 80 | + ${openfst_SOURCE_DIR}/src/include | ||
| 79 | ) | 81 | ) |
| 80 | 82 | ||
| 81 | - if(KALDIFST_BUILD_PYTHON) | ||
| 82 | - set_target_properties(fstscript PROPERTIES OUTPUT_NAME "kaldifst_fstscript") | ||
| 83 | - install(TARGETS fstscript | ||
| 84 | - DESTINATION lib | ||
| 85 | - ) | ||
| 86 | - endif() | 83 | + target_include_directories(fstfar |
| 84 | + PUBLIC | ||
| 85 | + ${openfst_SOURCE_DIR}/src/include | ||
| 86 | + ) | ||
| 87 | + # installed in ./kaldi-decoder.cmake | ||
| 87 | endfunction() | 88 | endfunction() |
| 88 | 89 | ||
| 89 | download_openfst() | 90 | download_openfst() |
| @@ -13,4 +13,4 @@ Cflags: -I"${includedir}" | @@ -13,4 +13,4 @@ Cflags: -I"${includedir}" | ||
| 13 | # Note: -lcargs is required only for the following file | 13 | # Note: -lcargs is required only for the following file |
| 14 | # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c | 14 | # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c |
| 15 | # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c | 15 | # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c |
| 16 | -Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ | 16 | +Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ |
| @@ -20,6 +20,9 @@ class OfflineTtsDemo | @@ -20,6 +20,9 @@ class OfflineTtsDemo | ||
| 20 | [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] | 20 | [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] |
| 21 | public string RuleFsts { get; set; } | 21 | public string RuleFsts { get; set; } |
| 22 | 22 | ||
| 23 | + [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")] | ||
| 24 | + public string RuleFars { get; set; } | ||
| 25 | + | ||
| 23 | [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] | 26 | [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] |
| 24 | public string DataDir { get; set; } | 27 | public string DataDir { get; set; } |
| 25 | 28 | ||
| @@ -72,14 +75,15 @@ class OfflineTtsDemo | @@ -72,14 +75,15 @@ class OfflineTtsDemo | ||
| 72 | string usage = @" | 75 | string usage = @" |
| 73 | # vits-aishell3 | 76 | # vits-aishell3 |
| 74 | 77 | ||
| 75 | -wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 76 | -tar xf vits-zh-aishell3.tar.bz2 | 78 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 79 | +tar xvf vits-icefall-zh-aishell3.tar.bz2 | ||
| 77 | 80 | ||
| 78 | dotnet run \ | 81 | dotnet run \ |
| 79 | - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \ | ||
| 80 | - --vits-tokens=./vits-zh-aishell3/tokens.txt \ | ||
| 81 | - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \ | ||
| 82 | - --tts-rule-fsts=./vits-zh-aishell3/rule.fst \ | 82 | + --vits-model=./vits-icefall-zh-aishell3/model.onnx \ |
| 83 | + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \ | ||
| 84 | + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \ | ||
| 85 | + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \ | ||
| 86 | + --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \ | ||
| 83 | --sid=66 \ | 87 | --sid=66 \ |
| 84 | --debug=1 \ | 88 | --debug=1 \ |
| 85 | --output-filename=./aishell3-66.wav \ | 89 | --output-filename=./aishell3-66.wav \ |
| @@ -127,6 +131,7 @@ to download more models. | @@ -127,6 +131,7 @@ to download more models. | ||
| 127 | config.Model.Debug = options.Debug; | 131 | config.Model.Debug = options.Debug; |
| 128 | config.Model.Provider = "cpu"; | 132 | config.Model.Provider = "cpu"; |
| 129 | config.RuleFsts = options.RuleFsts; | 133 | config.RuleFsts = options.RuleFsts; |
| 134 | + config.RuleFars = options.RuleFars; | ||
| 130 | config.MaxNumSentences = options.MaxNumSentences; | 135 | config.MaxNumSentences = options.MaxNumSentences; |
| 131 | 136 | ||
| 132 | OfflineTts tts = new OfflineTts(config); | 137 | OfflineTts tts = new OfflineTts(config); |
| 1 | #!/usr/bin/env bash | 1 | #!/usr/bin/env bash |
| 2 | set -ex | 2 | set -ex |
| 3 | if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then | 3 | if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then |
| 4 | - # wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 5 | - curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 6 | - tar xf vits-zh-aishell3.tar.bz2 | ||
| 7 | - rm vits-zh-aishell3.tar.bz2 | 4 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 5 | + tar xvf vits-icefall-zh-aishell3.tar.bz2 | ||
| 6 | + rm vits-icefall-zh-aishell3.tar.bz2 | ||
| 8 | fi | 7 | fi |
| 9 | 8 | ||
| 10 | dotnet run \ | 9 | dotnet run \ |
| 11 | - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \ | ||
| 12 | - --vits-tokens=./vits-zh-aishell3/tokens.txt \ | ||
| 13 | - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \ | ||
| 14 | - --tts-rule-fsts=./vits-zh-aishell3/rule.fst \ | 10 | + --vits-model=./vits-icefall-zh-aishell3/model.onnx \ |
| 11 | + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \ | ||
| 12 | + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \ | ||
| 13 | + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \ | ||
| 14 | + --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \ | ||
| 15 | --sid=66 \ | 15 | --sid=66 \ |
| 16 | --debug=1 \ | 16 | --debug=1 \ |
| 17 | --output-filename=./aishell3-66.wav \ | 17 | --output-filename=./aishell3-66.wav \ |
| 18 | - --text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。" | 18 | + --text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。长沙长大,去过长白山和长安街。行行出状元。行行,银行行长,行业。" |
| @@ -26,6 +26,7 @@ func main() { | @@ -26,6 +26,7 @@ func main() { | ||
| 26 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") | 26 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") |
| 27 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") | 27 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") |
| 28 | flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst") | 28 | flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst") |
| 29 | + flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far") | ||
| 29 | flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size") | 30 | flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size") |
| 30 | 31 | ||
| 31 | flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models") | 32 | flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models") |
| @@ -6,21 +6,32 @@ | @@ -6,21 +6,32 @@ | ||
| 6 | 6 | ||
| 7 | for sid in 10 33 99; do | 7 | for sid in 10 33 99; do |
| 8 | ./non-streaming-tts \ | 8 | ./non-streaming-tts \ |
| 9 | - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \ | ||
| 10 | - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \ | ||
| 11 | - --vits-tokens=./vits-zh-aishell3/tokens.txt \ | 9 | + --vits-model=./vits-icefall-zh-aishell3/model.onnx \ |
| 10 | + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \ | ||
| 11 | + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \ | ||
| 12 | --sid=$sid \ | 12 | --sid=$sid \ |
| 13 | --debug=1 \ | 13 | --debug=1 \ |
| 14 | --output-filename=./liliana-$sid.wav \ | 14 | --output-filename=./liliana-$sid.wav \ |
| 15 | "林美丽最美丽、最漂亮、最可爱!" | 15 | "林美丽最美丽、最漂亮、最可爱!" |
| 16 | 16 | ||
| 17 | ./non-streaming-tts \ | 17 | ./non-streaming-tts \ |
| 18 | - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \ | ||
| 19 | - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \ | ||
| 20 | - --vits-tokens=./vits-zh-aishell3/tokens.txt \ | ||
| 21 | - --tts-rule-fsts=./vits-zh-aishell3/rule.fst \ | 18 | + --vits-model=./vits-icefall-zh-aishell3/model.onnx \ |
| 19 | + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \ | ||
| 20 | + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \ | ||
| 21 | + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \ | ||
| 22 | --sid=$sid \ | 22 | --sid=$sid \ |
| 23 | --debug=1 \ | 23 | --debug=1 \ |
| 24 | --output-filename=./numbers-$sid.wav \ | 24 | --output-filename=./numbers-$sid.wav \ |
| 25 | "数字12345.6789怎么念" | 25 | "数字12345.6789怎么念" |
| 26 | + | ||
| 27 | +./non-streaming-tts \ | ||
| 28 | + --vits-model=./vits-icefall-zh-aishell3/model.onnx \ | ||
| 29 | + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \ | ||
| 30 | + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \ | ||
| 31 | + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \ | ||
| 32 | + --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \ | ||
| 33 | + --sid=$sid \ | ||
| 34 | + --debug=1 \ | ||
| 35 | + --output-filename=./heteronym-$sid.wav \ | ||
| 36 | + "万古长存长沙长大长白山长孙长安街" | ||
| 26 | done | 37 | done |
| @@ -7,10 +7,9 @@ | @@ -7,10 +7,9 @@ | ||
| 7 | 7 | ||
| 8 | import Foundation | 8 | import Foundation |
| 9 | 9 | ||
| 10 | - | ||
| 11 | // used to get the path to espeak-ng-data | 10 | // used to get the path to espeak-ng-data |
| 12 | func resourceURL(to path: String) -> String { | 11 | func resourceURL(to path: String) -> String { |
| 13 | - return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path | 12 | + return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path |
| 14 | } | 13 | } |
| 15 | 14 | ||
| 16 | func getResource(_ forResource: String, _ ofType: String) -> String { | 15 | func getResource(_ forResource: String, _ ofType: String) -> String { |
| @@ -50,8 +49,7 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { | @@ -50,8 +49,7 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { | ||
| 50 | // See the following link | 49 | // See the following link |
| 51 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3 | 50 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3 |
| 52 | 51 | ||
| 53 | - // vits-vctk.onnx | ||
| 54 | - let model = getResource("vits-aishell3", "onnx") | 52 | + let model = getResource("model", "onnx") |
| 55 | 53 | ||
| 56 | // lexicon.txt | 54 | // lexicon.txt |
| 57 | let lexicon = getResource("lexicon", "txt") | 55 | let lexicon = getResource("lexicon", "txt") |
| @@ -59,9 +57,19 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { | @@ -59,9 +57,19 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { | ||
| 59 | // tokens.txt | 57 | // tokens.txt |
| 60 | let tokens = getResource("tokens", "txt") | 58 | let tokens = getResource("tokens", "txt") |
| 61 | 59 | ||
| 60 | + // rule.fst | ||
| 61 | + let ruleFsts = getResource("rule", "fst") | ||
| 62 | + | ||
| 63 | + // rule.far | ||
| 64 | + let ruleFars = getResource("rule", "far") | ||
| 65 | + | ||
| 62 | let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens) | 66 | let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens) |
| 63 | let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) | 67 | let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) |
| 64 | - var config = sherpaOnnxOfflineTtsConfig(model: modelConfig) | 68 | + var config = sherpaOnnxOfflineTtsConfig( |
| 69 | + model: modelConfig, | ||
| 70 | + ruleFsts: ruleFsts, | ||
| 71 | + ruleFars: ruleFars | ||
| 72 | + ) | ||
| 65 | return SherpaOnnxOfflineTtsWrapper(config: &config) | 73 | return SherpaOnnxOfflineTtsWrapper(config: &config) |
| 66 | } | 74 | } |
| 67 | 75 | ||
| @@ -69,7 +77,6 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { | @@ -69,7 +77,6 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { | ||
| 69 | func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper { | 77 | func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper { |
| 70 | // please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 78 | // please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
| 71 | 79 | ||
| 72 | - // vits-vctk.onnx | ||
| 73 | let model = getResource("en_US-amy-low", "onnx") | 80 | let model = getResource("en_US-amy-low", "onnx") |
| 74 | 81 | ||
| 75 | // tokens.txt | 82 | // tokens.txt |
| @@ -78,7 +85,8 @@ func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper { | @@ -78,7 +85,8 @@ func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper { | ||
| 78 | // in this case, we don't need lexicon.txt | 85 | // in this case, we don't need lexicon.txt |
| 79 | let dataDir = resourceURL(to: "espeak-ng-data") | 86 | let dataDir = resourceURL(to: "espeak-ng-data") |
| 80 | 87 | ||
| 81 | - let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: "", tokens: tokens, dataDir: dataDir) | 88 | + let vits = sherpaOnnxOfflineTtsVitsModelConfig( |
| 89 | + model: model, lexicon: "", tokens: tokens, dataDir: dataDir) | ||
| 82 | let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) | 90 | let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) |
| 83 | var config = sherpaOnnxOfflineTtsConfig(model: modelConfig) | 91 | var config = sherpaOnnxOfflineTtsConfig(model: modelConfig) |
| 84 | 92 |
| @@ -11,6 +11,7 @@ | @@ -11,6 +11,7 @@ | ||
| 11 | sherpa-onnx-core.lib; | 11 | sherpa-onnx-core.lib; |
| 12 | kaldi-decoder-core.lib; | 12 | kaldi-decoder-core.lib; |
| 13 | sherpa-onnx-kaldifst-core.lib; | 13 | sherpa-onnx-kaldifst-core.lib; |
| 14 | + sherpa-onnx-fstfar.lib; | ||
| 14 | sherpa-onnx-fst.lib; | 15 | sherpa-onnx-fst.lib; |
| 15 | kaldi-native-fbank-core.lib; | 16 | kaldi-native-fbank-core.lib; |
| 16 | onnxruntime.lib; | 17 | onnxruntime.lib; |
| @@ -11,6 +11,7 @@ | @@ -11,6 +11,7 @@ | ||
| 11 | sherpa-onnx-core.lib; | 11 | sherpa-onnx-core.lib; |
| 12 | kaldi-decoder-core.lib; | 12 | kaldi-decoder-core.lib; |
| 13 | sherpa-onnx-kaldifst-core.lib; | 13 | sherpa-onnx-kaldifst-core.lib; |
| 14 | + sherpa-onnx-fstfar.lib; | ||
| 14 | sherpa-onnx-fst.lib; | 15 | sherpa-onnx-fst.lib; |
| 15 | kaldi-native-fbank-core.lib; | 16 | kaldi-native-fbank-core.lib; |
| 16 | onnxruntime.lib; | 17 | onnxruntime.lib; |
| @@ -11,6 +11,7 @@ | @@ -11,6 +11,7 @@ | ||
| 11 | sherpa-onnx-core.lib; | 11 | sherpa-onnx-core.lib; |
| 12 | kaldi-decoder-core.lib; | 12 | kaldi-decoder-core.lib; |
| 13 | sherpa-onnx-kaldifst-core.lib; | 13 | sherpa-onnx-kaldifst-core.lib; |
| 14 | + sherpa-onnx-fstfar.lib; | ||
| 14 | sherpa-onnx-fst.lib; | 15 | sherpa-onnx-fst.lib; |
| 15 | kaldi-native-fbank-core.lib; | 16 | kaldi-native-fbank-core.lib; |
| 16 | onnxruntime.lib; | 17 | onnxruntime.lib; |
| @@ -43,8 +43,8 @@ for text-to-speech. | @@ -43,8 +43,8 @@ for text-to-speech. | ||
| 43 | You can use the following command to run it: | 43 | You can use the following command to run it: |
| 44 | 44 | ||
| 45 | ```bash | 45 | ```bash |
| 46 | -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | ||
| 47 | -tar xvf vits-zh-aishell3.tar.bz2 | 46 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 47 | +tar xvf vits-icefall-zh-aishell3.tar.bz2 | ||
| 48 | node ./test-offline-tts-zh.js | 48 | node ./test-offline-tts-zh.js |
| 49 | ``` | 49 | ``` |
| 50 | 50 |
| @@ -22,6 +22,7 @@ function createOfflineTts() { | @@ -22,6 +22,7 @@ function createOfflineTts() { | ||
| 22 | let offlineTtsConfig = { | 22 | let offlineTtsConfig = { |
| 23 | offlineTtsModelConfig: offlineTtsModelConfig, | 23 | offlineTtsModelConfig: offlineTtsModelConfig, |
| 24 | ruleFsts: '', | 24 | ruleFsts: '', |
| 25 | + ruleFars: '', | ||
| 25 | maxNumSentences: 1, | 26 | maxNumSentences: 1, |
| 26 | }; | 27 | }; |
| 27 | 28 |
| @@ -4,9 +4,9 @@ const sherpa_onnx = require('sherpa-onnx'); | @@ -4,9 +4,9 @@ const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | 4 | ||
| 5 | function createOfflineTts() { | 5 | function createOfflineTts() { |
| 6 | let offlineTtsVitsModelConfig = { | 6 | let offlineTtsVitsModelConfig = { |
| 7 | - model: './vits-zh-aishell3/vits-aishell3.onnx', | ||
| 8 | - lexicon: './vits-zh-aishell3/lexicon.txt', | ||
| 9 | - tokens: './vits-zh-aishell3/tokens.txt', | 7 | + model: './vits-icefall-zh-aishell3/vits-aishell3.onnx', |
| 8 | + lexicon: './vits-icefall-zh-aishell3/lexicon.txt', | ||
| 9 | + tokens: './vits-icefall-zh-aishell3/tokens.txt', | ||
| 10 | dataDir: '', | 10 | dataDir: '', |
| 11 | noiseScale: 0.667, | 11 | noiseScale: 0.667, |
| 12 | noiseScaleW: 0.8, | 12 | noiseScaleW: 0.8, |
| @@ -21,7 +21,9 @@ function createOfflineTts() { | @@ -21,7 +21,9 @@ function createOfflineTts() { | ||
| 21 | 21 | ||
| 22 | let offlineTtsConfig = { | 22 | let offlineTtsConfig = { |
| 23 | offlineTtsModelConfig: offlineTtsModelConfig, | 23 | offlineTtsModelConfig: offlineTtsModelConfig, |
| 24 | - ruleFsts: './vits-zh-aishell3/rule.fst', | 24 | + ruleFsts: |
| 25 | + './vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst', | ||
| 26 | + ruleFars: './vits-icefall-zh-aishell3/rule.far', | ||
| 25 | maxNumSentences: 1, | 27 | maxNumSentences: 1, |
| 26 | }; | 28 | }; |
| 27 | 29 |
| @@ -56,6 +56,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | @@ -56,6 +56,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt | ||
| 56 | sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt | 56 | sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt |
| 57 | {% endif %} | 57 | {% endif %} |
| 58 | 58 | ||
| 59 | +{% if tts_model.rule_fars %} | ||
| 60 | + rule_fars={{ tts_model.rule_fars }} | ||
| 61 | + sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt | ||
| 62 | +{% endif %} | ||
| 63 | + | ||
| 59 | {% if tts_model.data_dir %} | 64 | {% if tts_model.data_dir %} |
| 60 | data_dir={{ tts_model.data_dir }} | 65 | data_dir={{ tts_model.data_dir }} |
| 61 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt | 66 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt |
| @@ -54,6 +54,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt | @@ -54,6 +54,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt | ||
| 54 | sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt | 54 | sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt |
| 55 | {% endif %} | 55 | {% endif %} |
| 56 | 56 | ||
| 57 | +{% if tts_model.rule_fars %} | ||
| 58 | + rule_fars={{ tts_model.rule_fars }} | ||
| 59 | + sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt | ||
| 60 | +{% endif %} | ||
| 61 | + | ||
| 57 | {% if tts_model.data_dir %} | 62 | {% if tts_model.data_dir %} |
| 58 | data_dir={{ tts_model.data_dir }} | 63 | data_dir={{ tts_model.data_dir }} |
| 59 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt | 64 | sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt |
| @@ -33,6 +33,7 @@ class TtsModel: | @@ -33,6 +33,7 @@ class TtsModel: | ||
| 33 | model_name: str = "" | 33 | model_name: str = "" |
| 34 | lang: str = "" # en, zh, fr, de, etc. | 34 | lang: str = "" # en, zh, fr, de, etc. |
| 35 | rule_fsts: Optional[List[str]] = None | 35 | rule_fsts: Optional[List[str]] = None |
| 36 | + rule_fars: Optional[List[str]] = None | ||
| 36 | data_dir: Optional[str] = None | 37 | data_dir: Optional[str] = None |
| 37 | is_char: bool = False | 38 | is_char: bool = False |
| 38 | lang_iso_639_3: str = "" | 39 | lang_iso_639_3: str = "" |
| @@ -241,98 +242,94 @@ def get_mimic3_models() -> List[TtsModel]: | @@ -241,98 +242,94 @@ def get_mimic3_models() -> List[TtsModel]: | ||
| 241 | 242 | ||
| 242 | 243 | ||
| 243 | def get_vits_models() -> List[TtsModel]: | 244 | def get_vits_models() -> List[TtsModel]: |
| 244 | - return [ | 245 | + chinese_models = [ |
| 245 | # Chinese | 246 | # Chinese |
| 246 | TtsModel( | 247 | TtsModel( |
| 247 | model_dir="vits-icefall-zh-aishell3", | 248 | model_dir="vits-icefall-zh-aishell3", |
| 248 | model_name="model.onnx", | 249 | model_name="model.onnx", |
| 249 | lang="zh", | 250 | lang="zh", |
| 250 | - rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/rule.fst", | 251 | + rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst", |
| 252 | + rule_fars="vits-icefall-zh-aishell3/rule.far", | ||
| 251 | ), | 253 | ), |
| 252 | TtsModel( | 254 | TtsModel( |
| 253 | model_dir="vits-zh-aishell3", | 255 | model_dir="vits-zh-aishell3", |
| 254 | model_name="vits-aishell3.onnx", | 256 | model_name="vits-aishell3.onnx", |
| 255 | lang="zh", | 257 | lang="zh", |
| 256 | - rule_fsts="vits-zh-aishell3/rule.fst", | ||
| 257 | ), | 258 | ), |
| 258 | TtsModel( | 259 | TtsModel( |
| 259 | model_dir="vits-zh-hf-doom", | 260 | model_dir="vits-zh-hf-doom", |
| 260 | model_name="doom.onnx", | 261 | model_name="doom.onnx", |
| 261 | lang="zh", | 262 | lang="zh", |
| 262 | - rule_fsts="vits-zh-hf-doom/rule.fst", | ||
| 263 | ), | 263 | ), |
| 264 | TtsModel( | 264 | TtsModel( |
| 265 | model_dir="vits-zh-hf-echo", | 265 | model_dir="vits-zh-hf-echo", |
| 266 | model_name="echo.onnx", | 266 | model_name="echo.onnx", |
| 267 | lang="zh", | 267 | lang="zh", |
| 268 | - rule_fsts="vits-zh-hf-echo/rule.fst", | ||
| 269 | ), | 268 | ), |
| 270 | TtsModel( | 269 | TtsModel( |
| 271 | model_dir="vits-zh-hf-zenyatta", | 270 | model_dir="vits-zh-hf-zenyatta", |
| 272 | model_name="zenyatta.onnx", | 271 | model_name="zenyatta.onnx", |
| 273 | lang="zh", | 272 | lang="zh", |
| 274 | - rule_fsts="vits-zh-hf-zenyatta/rule.fst", | ||
| 275 | ), | 273 | ), |
| 276 | TtsModel( | 274 | TtsModel( |
| 277 | model_dir="vits-zh-hf-abyssinvoker", | 275 | model_dir="vits-zh-hf-abyssinvoker", |
| 278 | model_name="abyssinvoker.onnx", | 276 | model_name="abyssinvoker.onnx", |
| 279 | lang="zh", | 277 | lang="zh", |
| 280 | - rule_fsts="vits-zh-hf-abyssinvoker/rule.fst", | ||
| 281 | ), | 278 | ), |
| 282 | TtsModel( | 279 | TtsModel( |
| 283 | model_dir="vits-zh-hf-keqing", | 280 | model_dir="vits-zh-hf-keqing", |
| 284 | model_name="keqing.onnx", | 281 | model_name="keqing.onnx", |
| 285 | lang="zh", | 282 | lang="zh", |
| 286 | - rule_fsts="vits-zh-hf-keqing/rule.fst", | ||
| 287 | ), | 283 | ), |
| 288 | TtsModel( | 284 | TtsModel( |
| 289 | model_dir="vits-zh-hf-eula", | 285 | model_dir="vits-zh-hf-eula", |
| 290 | model_name="eula.onnx", | 286 | model_name="eula.onnx", |
| 291 | lang="zh", | 287 | lang="zh", |
| 292 | - rule_fsts="vits-zh-hf-eula/rule.fst", | ||
| 293 | ), | 288 | ), |
| 294 | TtsModel( | 289 | TtsModel( |
| 295 | model_dir="vits-zh-hf-bronya", | 290 | model_dir="vits-zh-hf-bronya", |
| 296 | model_name="bronya.onnx", | 291 | model_name="bronya.onnx", |
| 297 | lang="zh", | 292 | lang="zh", |
| 298 | - rule_fsts="vits-zh-hf-bronya/rule.fst", | ||
| 299 | ), | 293 | ), |
| 300 | TtsModel( | 294 | TtsModel( |
| 301 | model_dir="vits-zh-hf-theresa", | 295 | model_dir="vits-zh-hf-theresa", |
| 302 | model_name="theresa.onnx", | 296 | model_name="theresa.onnx", |
| 303 | lang="zh", | 297 | lang="zh", |
| 304 | - rule_fsts="vits-zh-hf-theresa/rule.fst", | ||
| 305 | ), | 298 | ), |
| 306 | TtsModel( | 299 | TtsModel( |
| 307 | model_dir="vits-zh-hf-fanchen-wnj", | 300 | model_dir="vits-zh-hf-fanchen-wnj", |
| 308 | model_name="vits-zh-hf-fanchen-wnj.onnx", | 301 | model_name="vits-zh-hf-fanchen-wnj.onnx", |
| 309 | lang="zh", | 302 | lang="zh", |
| 310 | - rule_fsts="vits-zh-hf-fanchen-wnj/rule.fst", | ||
| 311 | ), | 303 | ), |
| 312 | TtsModel( | 304 | TtsModel( |
| 313 | model_dir="vits-zh-hf-fanchen-C", | 305 | model_dir="vits-zh-hf-fanchen-C", |
| 314 | model_name="vits-zh-hf-fanchen-C.onnx", | 306 | model_name="vits-zh-hf-fanchen-C.onnx", |
| 315 | lang="zh", | 307 | lang="zh", |
| 316 | - rule_fsts="vits-zh-hf-fanchen-C/rule.fst", | ||
| 317 | ), | 308 | ), |
| 318 | TtsModel( | 309 | TtsModel( |
| 319 | model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe", | 310 | model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe", |
| 320 | model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx", | 311 | model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx", |
| 321 | lang="zh", | 312 | lang="zh", |
| 322 | - rule_fsts="vits-zh-hf-fanchen-ZhiHuiLaoZhe/rule.fst", | ||
| 323 | ), | 313 | ), |
| 324 | TtsModel( | 314 | TtsModel( |
| 325 | model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new", | 315 | model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new", |
| 326 | model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx", | 316 | model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx", |
| 327 | lang="zh", | 317 | lang="zh", |
| 328 | - rule_fsts="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new/rule.fst", | ||
| 329 | ), | 318 | ), |
| 330 | TtsModel( | 319 | TtsModel( |
| 331 | model_dir="vits-zh-hf-fanchen-unity", | 320 | model_dir="vits-zh-hf-fanchen-unity", |
| 332 | model_name="vits-zh-hf-fanchen-unity.onnx", | 321 | model_name="vits-zh-hf-fanchen-unity.onnx", |
| 333 | lang="zh", | 322 | lang="zh", |
| 334 | - rule_fsts="vits-zh-hf-fanchen-unity/rule.fst", | ||
| 335 | ), | 323 | ), |
| 324 | + ] | ||
| 325 | + | ||
| 326 | + rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"] | ||
| 327 | + for m in chinese_models: | ||
| 328 | + s = [f"{m.model_dir}/{r}" for r in rule_fsts] | ||
| 329 | + m.rule_fsts = ",".join(s) | ||
| 330 | + m.rule_fars = f"{m.model_dir}/rule.far" | ||
| 331 | + | ||
| 332 | + all_models = chinese_models + [ | ||
| 336 | TtsModel( | 333 | TtsModel( |
| 337 | model_dir="vits-cantonese-hf-xiaomaiiwn", | 334 | model_dir="vits-cantonese-hf-xiaomaiiwn", |
| 338 | model_name="vits-cantonese-hf-xiaomaiiwn.onnx", | 335 | model_name="vits-cantonese-hf-xiaomaiiwn.onnx", |
| @@ -346,6 +343,8 @@ def get_vits_models() -> List[TtsModel]: | @@ -346,6 +343,8 @@ def get_vits_models() -> List[TtsModel]: | ||
| 346 | # fmt: on | 343 | # fmt: on |
| 347 | ] | 344 | ] |
| 348 | 345 | ||
| 346 | + return all_models | ||
| 347 | + | ||
| 349 | 348 | ||
| 350 | def main(): | 349 | def main(): |
| 351 | args = get_args() | 350 | args = get_args() |
| @@ -40,6 +40,7 @@ def process_linux(s): | @@ -40,6 +40,7 @@ def process_linux(s): | ||
| 40 | "libpiper_phonemize.so.1", | 40 | "libpiper_phonemize.so.1", |
| 41 | "libsherpa-onnx-c-api.so", | 41 | "libsherpa-onnx-c-api.so", |
| 42 | "libsherpa-onnx-core.so", | 42 | "libsherpa-onnx-core.so", |
| 43 | + "libsherpa-onnx-fstfar.so.7", | ||
| 43 | "libsherpa-onnx-fst.so.6", | 44 | "libsherpa-onnx-fst.so.6", |
| 44 | "libsherpa-onnx-kaldifst-core.so", | 45 | "libsherpa-onnx-kaldifst-core.so", |
| 45 | "libucd.so", | 46 | "libucd.so", |
| @@ -68,6 +69,7 @@ def process_macos(s): | @@ -68,6 +69,7 @@ def process_macos(s): | ||
| 68 | "libpiper_phonemize.1.dylib", | 69 | "libpiper_phonemize.1.dylib", |
| 69 | "libsherpa-onnx-c-api.dylib", | 70 | "libsherpa-onnx-c-api.dylib", |
| 70 | "libsherpa-onnx-core.dylib", | 71 | "libsherpa-onnx-core.dylib", |
| 72 | + "libsherpa-onnx-fstfar.7.dylib", | ||
| 71 | "libsherpa-onnx-fst.6.dylib", | 73 | "libsherpa-onnx-fst.6.dylib", |
| 72 | "libsherpa-onnx-kaldifst-core.dylib", | 74 | "libsherpa-onnx-kaldifst-core.dylib", |
| 73 | "libucd.dylib", | 75 | "libucd.dylib", |
| @@ -96,6 +98,7 @@ def process_windows(s, rid): | @@ -96,6 +98,7 @@ def process_windows(s, rid): | ||
| 96 | "piper_phonemize.dll", | 98 | "piper_phonemize.dll", |
| 97 | "sherpa-onnx-c-api.dll", | 99 | "sherpa-onnx-c-api.dll", |
| 98 | "sherpa-onnx-core.dll", | 100 | "sherpa-onnx-core.dll", |
| 101 | + "sherpa-onnx-fstfar.lib", | ||
| 99 | "sherpa-onnx-fst.lib", | 102 | "sherpa-onnx-fst.lib", |
| 100 | "sherpa-onnx-kaldifst-core.lib", | 103 | "sherpa-onnx-kaldifst-core.lib", |
| 101 | "ucd.dll", | 104 | "ucd.dll", |
| @@ -67,6 +67,7 @@ namespace SherpaOnnx | @@ -67,6 +67,7 @@ namespace SherpaOnnx | ||
| 67 | Model = new OfflineTtsModelConfig(); | 67 | Model = new OfflineTtsModelConfig(); |
| 68 | RuleFsts = ""; | 68 | RuleFsts = ""; |
| 69 | MaxNumSentences = 1; | 69 | MaxNumSentences = 1; |
| 70 | + RuleFars = ""; | ||
| 70 | } | 71 | } |
| 71 | public OfflineTtsModelConfig Model; | 72 | public OfflineTtsModelConfig Model; |
| 72 | 73 | ||
| @@ -74,6 +75,9 @@ namespace SherpaOnnx | @@ -74,6 +75,9 @@ namespace SherpaOnnx | ||
| 74 | public string RuleFsts; | 75 | public string RuleFsts; |
| 75 | 76 | ||
| 76 | public int MaxNumSentences; | 77 | public int MaxNumSentences; |
| 78 | + | ||
| 79 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 80 | + public string RuleFars; | ||
| 77 | } | 81 | } |
| 78 | 82 | ||
| 79 | public class OfflineTtsGeneratedAudio | 83 | public class OfflineTtsGeneratedAudio |
| @@ -41,6 +41,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then | @@ -41,6 +41,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then | ||
| 41 | cd .. | 41 | cd .. |
| 42 | rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0 | 42 | rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0 |
| 43 | rm -v libsherpa-onnx-fst.so | 43 | rm -v libsherpa-onnx-fst.so |
| 44 | + rm -v libsherpa-onnx-fstfar.so | ||
| 44 | rm -v libonnxruntime.so | 45 | rm -v libonnxruntime.so |
| 45 | rm -v libcargs.so | 46 | rm -v libcargs.so |
| 46 | rm -rf wheel | 47 | rm -rf wheel |
| @@ -67,6 +68,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then | @@ -67,6 +68,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then | ||
| 67 | rm -v libonnxruntime.dylib | 68 | rm -v libonnxruntime.dylib |
| 68 | rm -v libpiper_phonemize.1.2.0.dylib libpiper_phonemize.dylib | 69 | rm -v libpiper_phonemize.1.2.0.dylib libpiper_phonemize.dylib |
| 69 | rm -v libsherpa-onnx-fst.dylib | 70 | rm -v libsherpa-onnx-fst.dylib |
| 71 | + rm -v libsherpa-onnx-fstfar.dylib | ||
| 70 | rm -rf wheel | 72 | rm -rf wheel |
| 71 | ls -lh | 73 | ls -lh |
| 72 | cd .. | 74 | cd .. |
| @@ -2,5 +2,5 @@ | @@ -2,5 +2,5 @@ | ||
| 2 | 2 | ||
| 3 | package sherpa_onnx | 3 | package sherpa_onnx |
| 4 | 4 | ||
| 5 | -// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin | 5 | +// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin |
| 6 | import "C" | 6 | import "C" |
| @@ -554,6 +554,7 @@ type OfflineTtsModelConfig struct { | @@ -554,6 +554,7 @@ type OfflineTtsModelConfig struct { | ||
| 554 | type OfflineTtsConfig struct { | 554 | type OfflineTtsConfig struct { |
| 555 | Model OfflineTtsModelConfig | 555 | Model OfflineTtsModelConfig |
| 556 | RuleFsts string | 556 | RuleFsts string |
| 557 | + RuleFars string | ||
| 557 | MaxNumSentences int | 558 | MaxNumSentences int |
| 558 | } | 559 | } |
| 559 | 560 | ||
| @@ -583,6 +584,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | @@ -583,6 +584,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | ||
| 583 | c.rule_fsts = C.CString(config.RuleFsts) | 584 | c.rule_fsts = C.CString(config.RuleFsts) |
| 584 | defer C.free(unsafe.Pointer(c.rule_fsts)) | 585 | defer C.free(unsafe.Pointer(c.rule_fsts)) |
| 585 | 586 | ||
| 587 | + c.rule_fars = C.CString(config.RuleFars) | ||
| 588 | + defer C.free(unsafe.Pointer(c.rule_fars)) | ||
| 589 | + | ||
| 586 | c.max_num_sentences = C.int(config.MaxNumSentences) | 590 | c.max_num_sentences = C.int(config.MaxNumSentences) |
| 587 | 591 | ||
| 588 | c.model.vits.model = C.CString(config.Model.Vits.Model) | 592 | c.model.vits.model = C.CString(config.Model.Vits.Model) |
| @@ -818,6 +818,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | @@ -818,6 +818,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 818 | tts_config.model.debug = config->model.debug; | 818 | tts_config.model.debug = config->model.debug; |
| 819 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 819 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| 820 | tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); | 820 | tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); |
| 821 | + tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); | ||
| 821 | tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); | 822 | tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); |
| 822 | 823 | ||
| 823 | if (tts_config.model.debug) { | 824 | if (tts_config.model.debug) { |
| @@ -783,6 +783,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { | @@ -783,6 +783,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { | ||
| 783 | SherpaOnnxOfflineTtsModelConfig model; | 783 | SherpaOnnxOfflineTtsModelConfig model; |
| 784 | const char *rule_fsts; | 784 | const char *rule_fsts; |
| 785 | int32_t max_num_sentences; | 785 | int32_t max_num_sentences; |
| 786 | + const char *rule_fars; | ||
| 786 | } SherpaOnnxOfflineTtsConfig; | 787 | } SherpaOnnxOfflineTtsConfig; |
| 787 | 788 | ||
| 788 | SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { | 789 | SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { |
| @@ -164,6 +164,7 @@ endif() | @@ -164,6 +164,7 @@ endif() | ||
| 164 | 164 | ||
| 165 | if(SHERPA_ONNX_ENABLE_TTS) | 165 | if(SHERPA_ONNX_ENABLE_TTS) |
| 166 | target_link_libraries(sherpa-onnx-core piper_phonemize) | 166 | target_link_libraries(sherpa-onnx-core piper_phonemize) |
| 167 | + target_link_libraries(sherpa-onnx-core fstfar fst) | ||
| 167 | endif() | 168 | endif() |
| 168 | 169 | ||
| 169 | if(SHERPA_ONNX_ENABLE_CHECK) | 170 | if(SHERPA_ONNX_ENABLE_CHECK) |
| @@ -18,7 +18,6 @@ | @@ -18,7 +18,6 @@ | ||
| 18 | #endif | 18 | #endif |
| 19 | 19 | ||
| 20 | #include <memory> | 20 | #include <memory> |
| 21 | -#include <regex> // NOLINT | ||
| 22 | 21 | ||
| 23 | #include "sherpa-onnx/csrc/macros.h" | 22 | #include "sherpa-onnx/csrc/macros.h" |
| 24 | #include "sherpa-onnx/csrc/onnx-utils.h" | 23 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| @@ -26,6 +25,55 @@ | @@ -26,6 +25,55 @@ | ||
| 26 | 25 | ||
| 27 | namespace sherpa_onnx { | 26 | namespace sherpa_onnx { |
| 28 | 27 | ||
| 28 | +static std::vector<std::string> ProcessHeteronyms( | ||
| 29 | + const std::vector<std::string> &words) { | ||
| 30 | + std::vector<std::string> ans; | ||
| 31 | + ans.reserve(words.size()); | ||
| 32 | + | ||
| 33 | + int32_t num_words = static_cast<int32_t>(words.size()); | ||
| 34 | + int32_t i = 0; | ||
| 35 | + int32_t prev = -1; | ||
| 36 | + while (i < num_words) { | ||
| 37 | + // start of a phrase #$| | ||
| 38 | + if ((i + 2 < num_words) && words[i] == "#" && words[i + 1] == "$" && | ||
| 39 | + words[i + 2] == "|") { | ||
| 40 | + if (prev == -1) { | ||
| 41 | + prev = i + 3; | ||
| 42 | + } | ||
| 43 | + i = i + 3; | ||
| 44 | + continue; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + // end of a phrase |$# | ||
| 48 | + if ((i + 2 < num_words) && words[i] == "|" && words[i + 1] == "$" && | ||
| 49 | + words[i + 2] == "#") { | ||
| 50 | + if (prev != -1) { | ||
| 51 | + std::ostringstream os; | ||
| 52 | + for (int32_t k = prev; k < i; ++k) { | ||
| 53 | + if (words[k] != "|" && words[k] != "$" && words[k] != "#") { | ||
| 54 | + os << words[k]; | ||
| 55 | + } | ||
| 56 | + } | ||
| 57 | + ans.push_back(os.str()); | ||
| 58 | + | ||
| 59 | + prev = -1; | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + i += 3; | ||
| 63 | + continue; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + if (prev == -1) { | ||
| 67 | + // not inside a phrase | ||
| 68 | + ans.push_back(words[i]); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + ++i; | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + return ans; | ||
| 75 | +} | ||
| 76 | + | ||
| 29 | static void ToLowerCase(std::string *in_out) { | 77 | static void ToLowerCase(std::string *in_out) { |
| 30 | std::transform(in_out->begin(), in_out->end(), in_out->begin(), | 78 | std::transform(in_out->begin(), in_out->end(), in_out->begin(), |
| 31 | [](unsigned char c) { return std::tolower(c); }); | 79 | [](unsigned char c) { return std::tolower(c); }); |
| @@ -148,36 +196,9 @@ std::vector<std::vector<int64_t>> Lexicon::ConvertTextToTokenIdsChinese( | @@ -148,36 +196,9 @@ std::vector<std::vector<int64_t>> Lexicon::ConvertTextToTokenIdsChinese( | ||
| 148 | const std::string &_text) const { | 196 | const std::string &_text) const { |
| 149 | std::string text(_text); | 197 | std::string text(_text); |
| 150 | ToLowerCase(&text); | 198 | ToLowerCase(&text); |
| 151 | - std::vector<std::string> words; | ||
| 152 | - if (pattern_) { | ||
| 153 | - // Handle polyphones | ||
| 154 | - size_t pos = 0; | ||
| 155 | - auto begin = std::sregex_iterator(text.begin(), text.end(), *pattern_); | ||
| 156 | - auto end = std::sregex_iterator(); | ||
| 157 | - for (std::sregex_iterator i = begin; i != end; ++i) { | ||
| 158 | - std::smatch match = *i; | ||
| 159 | - if (pos < match.position()) { | ||
| 160 | - auto this_segment = text.substr(pos, match.position() - pos); | ||
| 161 | - auto this_segment_words = SplitUtf8(this_segment); | ||
| 162 | - words.insert(words.end(), this_segment_words.begin(), | ||
| 163 | - this_segment_words.end()); | ||
| 164 | - pos = match.position() + match.length(); | ||
| 165 | - } else if (pos == match.position()) { | ||
| 166 | - pos = match.position() + match.length(); | ||
| 167 | - } | ||
| 168 | 199 | ||
| 169 | - words.push_back(match.str()); | ||
| 170 | - } | ||
| 171 | - | ||
| 172 | - if (pos < text.size()) { | ||
| 173 | - auto this_segment = text.substr(pos, text.size() - pos); | ||
| 174 | - auto this_segment_words = SplitUtf8(this_segment); | ||
| 175 | - words.insert(words.end(), this_segment_words.begin(), | ||
| 176 | - this_segment_words.end()); | ||
| 177 | - } | ||
| 178 | - } else { | ||
| 179 | - words = SplitUtf8(text); | ||
| 180 | - } | 200 | + std::vector<std::string> words = SplitUtf8(text); |
| 201 | + words = ProcessHeteronyms(words); | ||
| 181 | 202 | ||
| 182 | if (debug_) { | 203 | if (debug_) { |
| 183 | fprintf(stderr, "Input text in string: %s\n", text.c_str()); | 204 | fprintf(stderr, "Input text in string: %s\n", text.c_str()); |
| @@ -357,9 +378,6 @@ void Lexicon::InitLexicon(std::istream &is) { | @@ -357,9 +378,6 @@ void Lexicon::InitLexicon(std::istream &is) { | ||
| 357 | std::string line; | 378 | std::string line; |
| 358 | std::string phone; | 379 | std::string phone; |
| 359 | 380 | ||
| 360 | - std::ostringstream os; | ||
| 361 | - std::string sep; | ||
| 362 | - | ||
| 363 | while (std::getline(is, line)) { | 381 | while (std::getline(is, line)) { |
| 364 | std::istringstream iss(line); | 382 | std::istringstream iss(line); |
| 365 | 383 | ||
| @@ -381,18 +399,9 @@ void Lexicon::InitLexicon(std::istream &is) { | @@ -381,18 +399,9 @@ void Lexicon::InitLexicon(std::istream &is) { | ||
| 381 | if (ids.empty()) { | 399 | if (ids.empty()) { |
| 382 | continue; | 400 | continue; |
| 383 | } | 401 | } |
| 384 | - if (language_ == Language::kChinese && word.size() > 3) { | ||
| 385 | - // this is not a single word; | ||
| 386 | - os << sep << word; | ||
| 387 | - sep = "|"; | ||
| 388 | - } | ||
| 389 | 402 | ||
| 390 | word2ids_.insert({std::move(word), std::move(ids)}); | 403 | word2ids_.insert({std::move(word), std::move(ids)}); |
| 391 | } | 404 | } |
| 392 | - | ||
| 393 | - if (!sep.empty()) { | ||
| 394 | - pattern_ = std::make_unique<std::regex>(os.str()); | ||
| 395 | - } | ||
| 396 | } | 405 | } |
| 397 | 406 | ||
| 398 | void Lexicon::InitPunctuations(const std::string &punctuations) { | 407 | void Lexicon::InitPunctuations(const std::string &punctuations) { |
| @@ -7,7 +7,6 @@ | @@ -7,7 +7,6 @@ | ||
| 7 | 7 | ||
| 8 | #include <cstdint> | 8 | #include <cstdint> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | -#include <regex> // NOLINT | ||
| 11 | #include <string> | 10 | #include <string> |
| 12 | #include <unordered_map> | 11 | #include <unordered_map> |
| 13 | #include <unordered_set> | 12 | #include <unordered_set> |
| @@ -65,9 +64,6 @@ class Lexicon : public OfflineTtsFrontend { | @@ -65,9 +64,6 @@ class Lexicon : public OfflineTtsFrontend { | ||
| 65 | std::unordered_map<std::string, int32_t> token2id_; | 64 | std::unordered_map<std::string, int32_t> token2id_; |
| 66 | Language language_; | 65 | Language language_; |
| 67 | bool debug_; | 66 | bool debug_; |
| 68 | - | ||
| 69 | - // for Chinese polyphones | ||
| 70 | - std::unique_ptr<std::regex> pattern_; | ||
| 71 | }; | 67 | }; |
| 72 | 68 | ||
| 73 | } // namespace sherpa_onnx | 69 | } // namespace sherpa_onnx |
| @@ -15,6 +15,9 @@ | @@ -15,6 +15,9 @@ | ||
| 15 | #include "android/asset_manager.h" | 15 | #include "android/asset_manager.h" |
| 16 | #include "android/asset_manager_jni.h" | 16 | #include "android/asset_manager_jni.h" |
| 17 | #endif | 17 | #endif |
| 18 | + | ||
| 19 | +#include "fst/extensions/far/far.h" | ||
| 20 | +#include "kaldifst/csrc/kaldi-fst-io.h" | ||
| 18 | #include "kaldifst/csrc/text-normalizer.h" | 21 | #include "kaldifst/csrc/text-normalizer.h" |
| 19 | #include "sherpa-onnx/csrc/lexicon.h" | 22 | #include "sherpa-onnx/csrc/lexicon.h" |
| 20 | #include "sherpa-onnx/csrc/macros.h" | 23 | #include "sherpa-onnx/csrc/macros.h" |
| @@ -46,6 +49,32 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { | @@ -46,6 +49,32 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { | ||
| 46 | tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f)); | 49 | tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f)); |
| 47 | } | 50 | } |
| 48 | } | 51 | } |
| 52 | + | ||
| 53 | + if (!config.rule_fars.empty()) { | ||
| 54 | + if (config.model.debug) { | ||
| 55 | + SHERPA_ONNX_LOGE("Loading FST archives"); | ||
| 56 | + } | ||
| 57 | + std::vector<std::string> files; | ||
| 58 | + SplitStringToVector(config.rule_fars, ",", false, &files); | ||
| 59 | + for (const auto &f : files) { | ||
| 60 | + if (config.model.debug) { | ||
| 61 | + SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); | ||
| 62 | + } | ||
| 63 | + std::unique_ptr<fst::FarReader<fst::StdArc>> reader( | ||
| 64 | + fst::FarReader<fst::StdArc>::Open(f)); | ||
| 65 | + for (; !reader->Done(); reader->Next()) { | ||
| 66 | + std::unique_ptr<fst::StdConstFst> r( | ||
| 67 | + fst::CastOrConvertToConstFst(reader->GetFst()->Copy())); | ||
| 68 | + | ||
| 69 | + tn_list_.push_back( | ||
| 70 | + std::make_unique<kaldifst::TextNormalizer>(std::move(r))); | ||
| 71 | + } | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + if (config.model.debug) { | ||
| 75 | + SHERPA_ONNX_LOGE("FST archives loaded!"); | ||
| 76 | + } | ||
| 77 | + } | ||
| 49 | } | 78 | } |
| 50 | 79 | ||
| 51 | #if __ANDROID_API__ >= 9 | 80 | #if __ANDROID_API__ >= 9 |
| @@ -20,7 +20,14 @@ void OfflineTtsConfig::Register(ParseOptions *po) { | @@ -20,7 +20,14 @@ void OfflineTtsConfig::Register(ParseOptions *po) { | ||
| 20 | "It not empty, it contains a list of rule FST filenames." | 20 | "It not empty, it contains a list of rule FST filenames." |
| 21 | "Multiple filenames are separated by a comma and they are " | 21 | "Multiple filenames are separated by a comma and they are " |
| 22 | "applied from left to right. An example value: " | 22 | "applied from left to right. An example value: " |
| 23 | - "rule1.fst,rule2,fst,rule3.fst"); | 23 | + "rule1.fst,rule2.fst,rule3.fst"); |
| 24 | + | ||
| 25 | + po->Register("tts-rule-fars", &rule_fars, | ||
| 26 | + "It not empty, it contains a list of rule FST archive filenames." | ||
| 27 | + "Multiple filenames are separated by a comma and they are " | ||
| 28 | + "applied from left to right. An example value: " | ||
| 29 | + "rule1.far,rule2.far,rule3.far. Note that an *.far can contain " | ||
| 30 | + "multiple *.fst files"); | ||
| 24 | 31 | ||
| 25 | po->Register( | 32 | po->Register( |
| 26 | "tts-max-num-sentences", &max_num_sentences, | 33 | "tts-max-num-sentences", &max_num_sentences, |
| @@ -41,6 +48,17 @@ bool OfflineTtsConfig::Validate() const { | @@ -41,6 +48,17 @@ bool OfflineTtsConfig::Validate() const { | ||
| 41 | } | 48 | } |
| 42 | } | 49 | } |
| 43 | 50 | ||
| 51 | + if (!rule_fars.empty()) { | ||
| 52 | + std::vector<std::string> files; | ||
| 53 | + SplitStringToVector(rule_fars, ",", false, &files); | ||
| 54 | + for (const auto &f : files) { | ||
| 55 | + if (!FileExists(f)) { | ||
| 56 | + SHERPA_ONNX_LOGE("Rule far %s does not exist. ", f.c_str()); | ||
| 57 | + return false; | ||
| 58 | + } | ||
| 59 | + } | ||
| 60 | + } | ||
| 61 | + | ||
| 44 | return model.Validate(); | 62 | return model.Validate(); |
| 45 | } | 63 | } |
| 46 | 64 | ||
| @@ -50,6 +68,7 @@ std::string OfflineTtsConfig::ToString() const { | @@ -50,6 +68,7 @@ std::string OfflineTtsConfig::ToString() const { | ||
| 50 | os << "OfflineTtsConfig("; | 68 | os << "OfflineTtsConfig("; |
| 51 | os << "model=" << model.ToString() << ", "; | 69 | os << "model=" << model.ToString() << ", "; |
| 52 | os << "rule_fsts=\"" << rule_fsts << "\", "; | 70 | os << "rule_fsts=\"" << rule_fsts << "\", "; |
| 71 | + os << "rule_fars=\"" << rule_fars << "\", "; | ||
| 53 | os << "max_num_sentences=" << max_num_sentences << ")"; | 72 | os << "max_num_sentences=" << max_num_sentences << ")"; |
| 54 | 73 | ||
| 55 | return os.str(); | 74 | return os.str(); |
| @@ -29,6 +29,9 @@ struct OfflineTtsConfig { | @@ -29,6 +29,9 @@ struct OfflineTtsConfig { | ||
| 29 | // If there are multiple rules, they are applied from left to right. | 29 | // If there are multiple rules, they are applied from left to right. |
| 30 | std::string rule_fsts; | 30 | std::string rule_fsts; |
| 31 | 31 | ||
| 32 | + // If there are multiple FST archives, they are applied from left to right. | ||
| 33 | + std::string rule_fars; | ||
| 34 | + | ||
| 32 | // Maximum number of sentences that we process at a time. | 35 | // Maximum number of sentences that we process at a time. |
| 33 | // This is to avoid OOM for very long input text. | 36 | // This is to avoid OOM for very long input text. |
| 34 | // If you set it to -1, then we process all sentences in a single batch. | 37 | // If you set it to -1, then we process all sentences in a single batch. |
| @@ -36,9 +39,11 @@ struct OfflineTtsConfig { | @@ -36,9 +39,11 @@ struct OfflineTtsConfig { | ||
| 36 | 39 | ||
| 37 | OfflineTtsConfig() = default; | 40 | OfflineTtsConfig() = default; |
| 38 | OfflineTtsConfig(const OfflineTtsModelConfig &model, | 41 | OfflineTtsConfig(const OfflineTtsModelConfig &model, |
| 39 | - const std::string &rule_fsts, int32_t max_num_sentences) | 42 | + const std::string &rule_fsts, const std::string &rule_fars, |
| 43 | + int32_t max_num_sentences) | ||
| 40 | : model(model), | 44 | : model(model), |
| 41 | rule_fsts(rule_fsts), | 45 | rule_fsts(rule_fsts), |
| 46 | + rule_fars(rule_fars), | ||
| 42 | max_num_sentences(max_num_sentences) {} | 47 | max_num_sentences(max_num_sentences) {} |
| 43 | 48 | ||
| 44 | void Register(ParseOptions *po); | 49 | void Register(ParseOptions *po); |
| @@ -878,6 +878,13 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { | @@ -878,6 +878,13 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { | ||
| 878 | ans.rule_fsts = p; | 878 | ans.rule_fsts = p; |
| 879 | env->ReleaseStringUTFChars(s, p); | 879 | env->ReleaseStringUTFChars(s, p); |
| 880 | 880 | ||
| 881 | + // for ruleFars | ||
| 882 | + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;"); | ||
| 883 | + s = (jstring)env->GetObjectField(config, fid); | ||
| 884 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 885 | + ans.rule_fars = p; | ||
| 886 | + env->ReleaseStringUTFChars(s, p); | ||
| 887 | + | ||
| 881 | fid = env->GetFieldID(cls, "maxNumSentences", "I"); | 888 | fid = env->GetFieldID(cls, "maxNumSentences", "I"); |
| 882 | ans.max_num_sentences = env->GetIntField(config, fid); | 889 | ans.max_num_sentences = env->GetIntField(config, fid); |
| 883 | 890 |
| @@ -32,11 +32,12 @@ static void PybindOfflineTtsConfig(py::module *m) { | @@ -32,11 +32,12 @@ static void PybindOfflineTtsConfig(py::module *m) { | ||
| 32 | py::class_<PyClass>(*m, "OfflineTtsConfig") | 32 | py::class_<PyClass>(*m, "OfflineTtsConfig") |
| 33 | .def(py::init<>()) | 33 | .def(py::init<>()) |
| 34 | .def(py::init<const OfflineTtsModelConfig &, const std::string &, | 34 | .def(py::init<const OfflineTtsModelConfig &, const std::string &, |
| 35 | - int32_t>(), | 35 | + const std::string &, int32_t>(), |
| 36 | py::arg("model"), py::arg("rule_fsts") = "", | 36 | py::arg("model"), py::arg("rule_fsts") = "", |
| 37 | - py::arg("max_num_sentences") = 2) | 37 | + py::arg("rule_fars") = "", py::arg("max_num_sentences") = 2) |
| 38 | .def_readwrite("model", &PyClass::model) | 38 | .def_readwrite("model", &PyClass::model) |
| 39 | .def_readwrite("rule_fsts", &PyClass::rule_fsts) | 39 | .def_readwrite("rule_fsts", &PyClass::rule_fsts) |
| 40 | + .def_readwrite("rule_fars", &PyClass::rule_fars) | ||
| 40 | .def_readwrite("max_num_sentences", &PyClass::max_num_sentences) | 41 | .def_readwrite("max_num_sentences", &PyClass::max_num_sentences) |
| 41 | .def("validate", &PyClass::Validate) | 42 | .def("validate", &PyClass::Validate) |
| 42 | .def("__str__", &PyClass::ToString); | 43 | .def("__str__", &PyClass::ToString); |
| @@ -652,12 +652,14 @@ func sherpaOnnxOfflineTtsModelConfig( | @@ -652,12 +652,14 @@ func sherpaOnnxOfflineTtsModelConfig( | ||
| 652 | func sherpaOnnxOfflineTtsConfig( | 652 | func sherpaOnnxOfflineTtsConfig( |
| 653 | model: SherpaOnnxOfflineTtsModelConfig, | 653 | model: SherpaOnnxOfflineTtsModelConfig, |
| 654 | ruleFsts: String = "", | 654 | ruleFsts: String = "", |
| 655 | + ruleFars: String = "", | ||
| 655 | maxNumSenetences: Int = 2 | 656 | maxNumSenetences: Int = 2 |
| 656 | ) -> SherpaOnnxOfflineTtsConfig { | 657 | ) -> SherpaOnnxOfflineTtsConfig { |
| 657 | return SherpaOnnxOfflineTtsConfig( | 658 | return SherpaOnnxOfflineTtsConfig( |
| 658 | model: model, | 659 | model: model, |
| 659 | rule_fsts: toCPointer(ruleFsts), | 660 | rule_fsts: toCPointer(ruleFsts), |
| 660 | - max_num_sentences: Int32(maxNumSenetences) | 661 | + max_num_sentences: Int32(maxNumSenetences), |
| 662 | + rule_fars: toCPointer(ruleFars) | ||
| 661 | ) | 663 | ) |
| 662 | } | 664 | } |
| 663 | 665 |
| @@ -90,7 +90,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { | @@ -90,7 +90,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { | ||
| 90 | function initSherpaOnnxOfflineTtsConfig(config, Module) { | 90 | function initSherpaOnnxOfflineTtsConfig(config, Module) { |
| 91 | const modelConfig = | 91 | const modelConfig = |
| 92 | initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); | 92 | initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); |
| 93 | - const len = modelConfig.len + 2 * 4; | 93 | + const len = modelConfig.len + 3 * 4; |
| 94 | const ptr = Module._malloc(len); | 94 | const ptr = Module._malloc(len); |
| 95 | 95 | ||
| 96 | let offset = 0; | 96 | let offset = 0; |
| @@ -98,12 +98,19 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) { | @@ -98,12 +98,19 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) { | ||
| 98 | offset += modelConfig.len; | 98 | offset += modelConfig.len; |
| 99 | 99 | ||
| 100 | const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1; | 100 | const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1; |
| 101 | - const buffer = Module._malloc(ruleFstsLen); | 101 | + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars) + 1; |
| 102 | + | ||
| 103 | + const buffer = Module._malloc(ruleFstsLen + ruleFarsLen); | ||
| 102 | Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); | 104 | Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); |
| 105 | + Module.stringToUTF8(config.ruleFars, buffer + ruleFstsLen, ruleFarsLen); | ||
| 106 | + | ||
| 103 | Module.setValue(ptr + offset, buffer, 'i8*'); | 107 | Module.setValue(ptr + offset, buffer, 'i8*'); |
| 104 | offset += 4; | 108 | offset += 4; |
| 105 | 109 | ||
| 106 | Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); | 110 | Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); |
| 111 | + offset += 4; | ||
| 112 | + | ||
| 113 | + Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*'); | ||
| 107 | 114 | ||
| 108 | return { | 115 | return { |
| 109 | buffer: buffer, ptr: ptr, len: len, config: modelConfig, | 116 | buffer: buffer, ptr: ptr, len: len, config: modelConfig, |
| @@ -190,6 +197,7 @@ function createOfflineTts(Module, myConfig) { | @@ -190,6 +197,7 @@ function createOfflineTts(Module, myConfig) { | ||
| 190 | let offlineTtsConfig = { | 197 | let offlineTtsConfig = { |
| 191 | offlineTtsModelConfig: offlineTtsModelConfig, | 198 | offlineTtsModelConfig: offlineTtsModelConfig, |
| 192 | ruleFsts: '', | 199 | ruleFsts: '', |
| 200 | + ruleFars: '', | ||
| 193 | maxNumSentences: 1, | 201 | maxNumSentences: 1, |
| 194 | } | 202 | } |
| 195 | 203 |
| @@ -18,7 +18,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == | @@ -18,7 +18,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == | ||
| 18 | sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4, | 18 | sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4, |
| 19 | ""); | 19 | ""); |
| 20 | static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == | 20 | static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == |
| 21 | - sizeof(SherpaOnnxOfflineTtsModelConfig) + 2 * 4, | 21 | + sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4, |
| 22 | ""); | 22 | ""); |
| 23 | 23 | ||
| 24 | void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { | 24 | void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { |
| @@ -40,6 +40,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { | @@ -40,6 +40,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { | ||
| 40 | 40 | ||
| 41 | fprintf(stdout, "----------tts config----------\n"); | 41 | fprintf(stdout, "----------tts config----------\n"); |
| 42 | fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); | 42 | fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); |
| 43 | + fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars); | ||
| 43 | fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); | 44 | fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); |
| 44 | } | 45 | } |
| 45 | 46 |
-
请 注册 或 登录 后发表评论