Fangjun Kuang
Committed by GitHub

Support heteronyms in Chinese TTS (#738)

正在显示 49 个修改的文件 包含 308 行增加143 行删除
@@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 @@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
70 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 70 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
71 tar xf vits-piper-en_US-amy-low.tar.bz2 71 tar xf vits-piper-en_US-amy-low.tar.bz2
72 node ./test-offline-tts-en.js 72 node ./test-offline-tts-en.js
73 -rm vits-piper-en_US-amy-low.tar.bz2 73 +rm vits-piper-en_US-amy-low*
74 74
75 -curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2  
76 -tar xvf vits-zh-aishell3.tar.bz2 75 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  76 +tar xvf vits-icefall-zh-aishell3.tar.bz2
77 node ./test-offline-tts-zh.js 77 node ./test-offline-tts-zh.js
78 -rm vits-zh-aishell3.tar.bz2 78 +rm vits-icefall-zh-aishell3*
@@ -173,6 +173,7 @@ jobs: @@ -173,6 +173,7 @@ jobs:
173 rm -v $dst/lib/libasound.so 173 rm -v $dst/lib/libasound.so
174 rm -v $dst/lib/libonnxruntime.so 174 rm -v $dst/lib/libonnxruntime.so
175 rm -v $dst/lib/libsherpa-onnx-fst.so 175 rm -v $dst/lib/libsherpa-onnx-fst.so
  176 + rm -v $dst/lib/libsherpa-onnx-fstfar.so
176 fi 177 fi
177 178
178 tree $dst 179 tree $dst
@@ -211,6 +211,7 @@ jobs: @@ -211,6 +211,7 @@ jobs:
211 rm -fv $dst/lib/libasound.so 211 rm -fv $dst/lib/libasound.so
212 rm -fv $dst/lib/libonnxruntime.so 212 rm -fv $dst/lib/libonnxruntime.so
213 rm -fv $dst/lib/libsherpa-onnx-fst.so 213 rm -fv $dst/lib/libsherpa-onnx-fst.so
  214 + rm -fv $dst/lib/libsherpa-onnx-fstfar.so
214 fi 215 fi
215 216
216 tree $dst 217 tree $dst
@@ -111,9 +111,11 @@ jobs: @@ -111,9 +111,11 @@ jobs:
111 rm -rf vits-vctk 111 rm -rf vits-vctk
112 112
113 echo "Test vits-zh-aishell3" 113 echo "Test vits-zh-aishell3"
114 - git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 114 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  115 + tar xvf vits-icefall-zh-aishell3.tar.bz2
  116 + rm vits-icefall-zh-aishell3.tar.bz2
115 ./run-vits-zh-aishell3.sh 117 ./run-vits-zh-aishell3.sh
116 - rm -rf vits-zh-aishell3 118 + rm -rf vits-icefall-zh-aishell3
117 119
118 echo "Test vits-piper-en_US-lessac-medium" 120 echo "Test vits-piper-en_US-lessac-medium"
119 git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium 121 git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium
@@ -90,3 +90,4 @@ sherpa-onnx-paraformer-trilingual-zh-cantonese-en @@ -90,3 +90,4 @@ sherpa-onnx-paraformer-trilingual-zh-cantonese-en
90 sr-data 90 sr-data
91 *xcworkspace/xcuserdata/* 91 *xcworkspace/xcuserdata/*
92 92
  93 +vits-icefall-*
1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.9.16") 4 +set(SHERPA_ONNX_VERSION "1.9.17")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
@@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() { @@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() {
155 var modelDir: String? 155 var modelDir: String?
156 var modelName: String? 156 var modelName: String?
157 var ruleFsts: String? 157 var ruleFsts: String?
  158 + var ruleFars: String?
158 var lexicon: String? 159 var lexicon: String?
159 var dataDir: String? 160 var dataDir: String?
160 var assets: AssetManager? = application.assets 161 var assets: AssetManager? = application.assets
@@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() { @@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() {
165 modelDir = null 166 modelDir = null
166 modelName = null 167 modelName = null
167 ruleFsts = null 168 ruleFsts = null
  169 + ruleFars = null
168 lexicon = null 170 lexicon = null
169 dataDir = null 171 dataDir = null
170 172
@@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() { @@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() {
181 // dataDir = "vits-piper-en_US-amy-low/espeak-ng-data" 183 // dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"
182 184
183 // Example 3: 185 // Example 3:
184 - // modelDir = "vits-zh-aishell3"  
185 - // modelName = "vits-aishell3.onnx"  
186 - // ruleFsts = "vits-zh-aishell3/rule.fst" 186 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  187 + // modelDir = "vits-icefall-zh-aishell3"
  188 + // modelName = "model.onnx"
  189 + // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,"
  190 + // ruleFars = "vits-icefall-zh-aishell3/rule.far"
187 // lexicon = "lexicon.txt" 191 // lexicon = "lexicon.txt"
188 192
189 // Example 4: 193 // Example 4:
@@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() { @@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() {
202 val config = getOfflineTtsConfig( 206 val config = getOfflineTtsConfig(
203 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", 207 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
204 dataDir = dataDir ?: "", 208 dataDir = dataDir ?: "",
205 - ruleFsts = ruleFsts ?: "" 209 + ruleFsts = ruleFsts ?: "",
  210 + ruleFars = ruleFars ?: "",
206 )!! 211 )!!
207 212
208 tts = OfflineTts(assetManager = assets, config = config) 213 tts = OfflineTts(assetManager = assets, config = config)
@@ -23,6 +23,7 @@ data class OfflineTtsModelConfig( @@ -23,6 +23,7 @@ data class OfflineTtsModelConfig(
23 data class OfflineTtsConfig( 23 data class OfflineTtsConfig(
24 var model: OfflineTtsModelConfig, 24 var model: OfflineTtsModelConfig,
25 var ruleFsts: String = "", 25 var ruleFsts: String = "",
  26 + var ruleFars: String = "",
26 var maxNumSentences: Int = 1, 27 var maxNumSentences: Int = 1,
27 ) 28 )
28 29
@@ -151,7 +152,8 @@ fun getOfflineTtsConfig( @@ -151,7 +152,8 @@ fun getOfflineTtsConfig(
151 modelName: String, 152 modelName: String,
152 lexicon: String, 153 lexicon: String,
153 dataDir: String, 154 dataDir: String,
154 - ruleFsts: String 155 + ruleFsts: String,
  156 + ruleFars: String
155 ): OfflineTtsConfig? { 157 ): OfflineTtsConfig? {
156 return OfflineTtsConfig( 158 return OfflineTtsConfig(
157 model = OfflineTtsModelConfig( 159 model = OfflineTtsModelConfig(
@@ -166,5 +168,6 @@ fun getOfflineTtsConfig( @@ -166,5 +168,6 @@ fun getOfflineTtsConfig(
166 provider = "cpu", 168 provider = "cpu",
167 ), 169 ),
168 ruleFsts = ruleFsts, 170 ruleFsts = ruleFsts,
  171 + ruleFars = ruleFars,
169 ) 172 )
170 } 173 }
@@ -39,6 +39,7 @@ object TtsEngine { @@ -39,6 +39,7 @@ object TtsEngine {
39 private var modelDir: String? = null 39 private var modelDir: String? = null
40 private var modelName: String? = null 40 private var modelName: String? = null
41 private var ruleFsts: String? = null 41 private var ruleFsts: String? = null
  42 + private var ruleFars: String? = null
42 private var lexicon: String? = null 43 private var lexicon: String? = null
43 private var dataDir: String? = null 44 private var dataDir: String? = null
44 private var assets: AssetManager? = null 45 private var assets: AssetManager? = null
@@ -50,6 +51,7 @@ object TtsEngine { @@ -50,6 +51,7 @@ object TtsEngine {
50 modelDir = null 51 modelDir = null
51 modelName = null 52 modelName = null
52 ruleFsts = null 53 ruleFsts = null
  54 + ruleFars = null
53 lexicon = null 55 lexicon = null
54 dataDir = null 56 dataDir = null
55 lang = null 57 lang = null
@@ -73,9 +75,10 @@ object TtsEngine { @@ -73,9 +75,10 @@ object TtsEngine {
73 75
74 // Example 3: 76 // Example 3:
75 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 77 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
76 - // modelDir = "vits-zh-aishell3"  
77 - // modelName = "vits-aishell3.onnx"  
78 - // ruleFsts = "vits-zh-aishell3/rule.fst" 78 + // modelDir = "vits-icefall-zh-aishell3"
  79 + // modelName = "model.onnx"
  80 + // ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
  81 + // ruleFars = "vits-icefall-zh-aishell3/rule.far"
79 // lexicon = "lexicon.txt" 82 // lexicon = "lexicon.txt"
80 // lang = "zho" 83 // lang = "zho"
81 84
@@ -108,7 +111,8 @@ object TtsEngine { @@ -108,7 +111,8 @@ object TtsEngine {
108 val config = getOfflineTtsConfig( 111 val config = getOfflineTtsConfig(
109 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", 112 modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
110 dataDir = dataDir ?: "", 113 dataDir = dataDir ?: "",
111 - ruleFsts = ruleFsts ?: "" 114 + ruleFsts = ruleFsts ?: "",
  115 + ruleFars = ruleFars ?: ""
112 )!! 116 )!!
113 117
114 tts = OfflineTts(assetManager = assets, config = config) 118 tts = OfflineTts(assetManager = assets, config = config)
@@ -124,6 +124,7 @@ echo "Generate xcframework" @@ -124,6 +124,7 @@ echo "Generate xcframework"
124 124
125 mkdir -p "build/simulator/lib" 125 mkdir -p "build/simulator/lib"
126 for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \ 126 for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \
  127 + libsherpa-onnx-fstfar.a \
127 libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a \ 128 libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a \
128 libucd.a libpiper_phonemize.a libespeak-ng.a; do 129 libucd.a libpiper_phonemize.a libespeak-ng.a; do
129 lipo -create build/simulator_arm64/lib/${f} \ 130 lipo -create build/simulator_arm64/lib/${f} \
@@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \ @@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \
137 build/simulator/lib/libkaldi-native-fbank-core.a \ 138 build/simulator/lib/libkaldi-native-fbank-core.a \
138 build/simulator/lib/libsherpa-onnx-c-api.a \ 139 build/simulator/lib/libsherpa-onnx-c-api.a \
139 build/simulator/lib/libsherpa-onnx-core.a \ 140 build/simulator/lib/libsherpa-onnx-core.a \
  141 + build/simulator/lib/libsherpa-onnx-fstfar.a \
140 build/simulator/lib/libsherpa-onnx-fst.a \ 142 build/simulator/lib/libsherpa-onnx-fst.a \
141 build/simulator/lib/libsherpa-onnx-kaldifst-core.a \ 143 build/simulator/lib/libsherpa-onnx-kaldifst-core.a \
142 build/simulator/lib/libkaldi-decoder-core.a \ 144 build/simulator/lib/libkaldi-decoder-core.a \
@@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \ @@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \
148 build/os64/lib/libkaldi-native-fbank-core.a \ 150 build/os64/lib/libkaldi-native-fbank-core.a \
149 build/os64/lib/libsherpa-onnx-c-api.a \ 151 build/os64/lib/libsherpa-onnx-c-api.a \
150 build/os64/lib/libsherpa-onnx-core.a \ 152 build/os64/lib/libsherpa-onnx-core.a \
  153 + build/os64/lib/libsherpa-onnx-fstfar.a \
151 build/os64/lib/libsherpa-onnx-fst.a \ 154 build/os64/lib/libsherpa-onnx-fst.a \
152 build/os64/lib/libsherpa-onnx-kaldifst-core.a \ 155 build/os64/lib/libsherpa-onnx-kaldifst-core.a \
153 build/os64/lib/libkaldi-decoder-core.a \ 156 build/os64/lib/libkaldi-decoder-core.a \
@@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \ @@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \
27 ./install/lib/libsherpa-onnx-c-api.a \ 27 ./install/lib/libsherpa-onnx-c-api.a \
28 ./install/lib/libsherpa-onnx-core.a \ 28 ./install/lib/libsherpa-onnx-core.a \
29 ./install/lib/libkaldi-native-fbank-core.a \ 29 ./install/lib/libkaldi-native-fbank-core.a \
  30 + ./install/lib/libsherpa-onnx-fstfar.a \
30 ./install/lib/libsherpa-onnx-fst.a \ 31 ./install/lib/libsherpa-onnx-fst.a \
31 ./install/lib/libsherpa-onnx-kaldifst-core.a \ 32 ./install/lib/libsherpa-onnx-kaldifst-core.a \
32 ./install/lib/libkaldi-decoder-core.a \ 33 ./install/lib/libkaldi-decoder-core.a \
@@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd) @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
4 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/ 4 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
5 LDFLAGS := -L ../build/lib 5 LDFLAGS := -L ../build/lib
6 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib 6 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
7 -LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime 7 +LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
8 LDFLAGS += -framework Foundation 8 LDFLAGS += -framework Foundation
9 LDFLAGS += -lc++ 9 LDFLAGS += -lc++
10 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib 10 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
@@ -78,6 +78,7 @@ def get_binaries(): @@ -78,6 +78,7 @@ def get_binaries():
78 "piper_phonemize.dll", 78 "piper_phonemize.dll",
79 "sherpa-onnx-c-api.dll", 79 "sherpa-onnx-c-api.dll",
80 "sherpa-onnx-core.dll", 80 "sherpa-onnx-core.dll",
  81 + "sherpa-onnx-fstfar.lib",
81 "sherpa-onnx-fst.lib", 82 "sherpa-onnx-fst.lib",
82 "sherpa-onnx-kaldifst-core.lib", 83 "sherpa-onnx-kaldifst-core.lib",
83 "sherpa-onnx-portaudio.dll", 84 "sherpa-onnx-portaudio.dll",
@@ -64,12 +64,22 @@ function(download_kaldi_decoder) @@ -64,12 +64,22 @@ function(download_kaldi_decoder)
64 kaldifst_core 64 kaldifst_core
65 fst 65 fst
66 DESTINATION ..) 66 DESTINATION ..)
  67 + if(SHERPA_ONNX_ENABLE_TTS)
  68 + install(TARGETS
  69 + fstfar
  70 + DESTINATION ..)
  71 + endif()
67 else() 72 else()
68 install(TARGETS 73 install(TARGETS
69 kaldi-decoder-core 74 kaldi-decoder-core
70 kaldifst_core 75 kaldifst_core
71 fst 76 fst
72 DESTINATION lib) 77 DESTINATION lib)
  78 + if(SHERPA_ONNX_ENABLE_TTS)
  79 + install(TARGETS
  80 + fstfar
  81 + DESTINATION lib)
  82 + endif()
73 endif() 83 endif()
74 84
75 if(WIN32 AND BUILD_SHARED_LIBS) 85 if(WIN32 AND BUILD_SHARED_LIBS)
@@ -78,6 +88,11 @@ function(download_kaldi_decoder) @@ -78,6 +88,11 @@ function(download_kaldi_decoder)
78 kaldifst_core 88 kaldifst_core
79 fst 89 fst
80 DESTINATION bin) 90 DESTINATION bin)
  91 + if(SHERPA_ONNX_ENABLE_TTS)
  92 + install(TARGETS
  93 + fstfar
  94 + DESTINATION bin)
  95 + endif()
81 endif() 96 endif()
82 endfunction() 97 endfunction()
83 98
@@ -50,13 +50,7 @@ function(download_kaldifst) @@ -50,13 +50,7 @@ function(download_kaldifst)
50 ${kaldifst_SOURCE_DIR}/ 50 ${kaldifst_SOURCE_DIR}/
51 ) 51 )
52 52
53 - target_include_directories(fst  
54 - PUBLIC  
55 - ${openfst_SOURCE_DIR}/src/include  
56 - )  
57 -  
58 set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core") 53 set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core")
59 - set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")  
60 endfunction() 54 endfunction()
61 55
62 download_kaldifst() 56 download_kaldifst()
@@ -4,7 +4,7 @@ function(download_openfst) @@ -4,7 +4,7 @@ function(download_openfst)
4 include(FetchContent) 4 include(FetchContent)
5 5
6 set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz") 6 set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
7 - set(openfst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/openfst-win-1.6.5.1.tar.gz") 7 + set(openfst_URL2 "https://hub.nuaa.cf/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
8 set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e") 8 set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e")
9 9
10 # If you don't have access to the Internet, 10 # If you don't have access to the Internet,
@@ -31,7 +31,7 @@ function(download_openfst) @@ -31,7 +31,7 @@ function(download_openfst)
31 set(HAVE_COMPACT OFF CACHE BOOL "" FORCE) 31 set(HAVE_COMPACT OFF CACHE BOOL "" FORCE)
32 set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE) 32 set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE)
33 set(HAVE_CONST OFF CACHE BOOL "" FORCE) 33 set(HAVE_CONST OFF CACHE BOOL "" FORCE)
34 - set(HAVE_FAR OFF CACHE BOOL "" FORCE) 34 + set(HAVE_FAR ON CACHE BOOL "" FORCE)
35 set(HAVE_GRM OFF CACHE BOOL "" FORCE) 35 set(HAVE_GRM OFF CACHE BOOL "" FORCE)
36 set(HAVE_PDT OFF CACHE BOOL "" FORCE) 36 set(HAVE_PDT OFF CACHE BOOL "" FORCE)
37 set(HAVE_MPDT OFF CACHE BOOL "" FORCE) 37 set(HAVE_MPDT OFF CACHE BOOL "" FORCE)
@@ -70,20 +70,21 @@ function(download_openfst) @@ -70,20 +70,21 @@ function(download_openfst)
70 add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL) 70 add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL)
71 set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE) 71 set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE)
72 72
73 - # Rename libfst.so.6 to libkaldifst_fst.so.6 to avoid potential conflicts  
74 - # when kaldifst is installed.  
75 - set_target_properties(fst PROPERTIES OUTPUT_NAME "kaldifst_fst") 73 + # Rename libfst.so.6 to libsherpa-onnx-fst.so.6 to avoid potential conflicts
  74 + # when sherpa-onnx is installed.
  75 + set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
  76 + set_target_properties(fstfar PROPERTIES OUTPUT_NAME "sherpa-onnx-fstfar")
76 77
77 - install(TARGETS fst  
78 - DESTINATION lib 78 + target_include_directories(fst
  79 + PUBLIC
  80 + ${openfst_SOURCE_DIR}/src/include
79 ) 81 )
80 82
81 - if(KALDIFST_BUILD_PYTHON)  
82 - set_target_properties(fstscript PROPERTIES OUTPUT_NAME "kaldifst_fstscript")  
83 - install(TARGETS fstscript  
84 - DESTINATION lib  
85 - )  
86 - endif() 83 + target_include_directories(fstfar
  84 + PUBLIC
  85 + ${openfst_SOURCE_DIR}/src/include
  86 + )
  87 + # installed in ./kaldi-decoder.cmake
87 endfunction() 88 endfunction()
88 89
89 download_openfst() 90 download_openfst()
@@ -13,4 +13,4 @@ Cflags: -I"${includedir}" @@ -13,4 +13,4 @@ Cflags: -I"${includedir}"
13 # Note: -lcargs is required only for the following file 13 # Note: -lcargs is required only for the following file
14 # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c 14 # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
15 # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c 15 # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
16 -Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ 16 +Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
@@ -20,6 +20,9 @@ class OfflineTtsDemo @@ -20,6 +20,9 @@ class OfflineTtsDemo
20 [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] 20 [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
21 public string RuleFsts { get; set; } 21 public string RuleFsts { get; set; }
22 22
  23 + [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
  24 + public string RuleFars { get; set; }
  25 +
23 [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] 26 [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
24 public string DataDir { get; set; } 27 public string DataDir { get; set; }
25 28
@@ -72,14 +75,15 @@ class OfflineTtsDemo @@ -72,14 +75,15 @@ class OfflineTtsDemo
72 string usage = @" 75 string usage = @"
73 # vits-aishell3 76 # vits-aishell3
74 77
75 -wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2  
76 -tar xf vits-zh-aishell3.tar.bz2 78 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  79 +tar xvf vits-icefall-zh-aishell3.tar.bz2
77 80
78 dotnet run \ 81 dotnet run \
79 - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \  
80 - --vits-tokens=./vits-zh-aishell3/tokens.txt \  
81 - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \  
82 - --tts-rule-fsts=./vits-zh-aishell3/rule.fst \ 82 + --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  83 + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  84 + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  85 + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  86 + --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
83 --sid=66 \ 87 --sid=66 \
84 --debug=1 \ 88 --debug=1 \
85 --output-filename=./aishell3-66.wav \ 89 --output-filename=./aishell3-66.wav \
@@ -127,6 +131,7 @@ to download more models. @@ -127,6 +131,7 @@ to download more models.
127 config.Model.Debug = options.Debug; 131 config.Model.Debug = options.Debug;
128 config.Model.Provider = "cpu"; 132 config.Model.Provider = "cpu";
129 config.RuleFsts = options.RuleFsts; 133 config.RuleFsts = options.RuleFsts;
  134 + config.RuleFars = options.RuleFars;
130 config.MaxNumSentences = options.MaxNumSentences; 135 config.MaxNumSentences = options.MaxNumSentences;
131 136
132 OfflineTts tts = new OfflineTts(config); 137 OfflineTts tts = new OfflineTts(config);
1 #!/usr/bin/env bash 1 #!/usr/bin/env bash
2 set -ex 2 set -ex
3 if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then 3 if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then
4 - # wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2  
5 - curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2  
6 - tar xf vits-zh-aishell3.tar.bz2  
7 - rm vits-zh-aishell3.tar.bz2 4 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  5 + tar xvf vits-icefall-zh-aishell3.tar.bz2
  6 + rm vits-icefall-zh-aishell3.tar.bz2
8 fi 7 fi
9 8
10 dotnet run \ 9 dotnet run \
11 - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \  
12 - --vits-tokens=./vits-zh-aishell3/tokens.txt \  
13 - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \  
14 - --tts-rule-fsts=./vits-zh-aishell3/rule.fst \ 10 + --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  11 + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  12 + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  13 + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  14 + --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
15 --sid=66 \ 15 --sid=66 \
16 --debug=1 \ 16 --debug=1 \
17 --output-filename=./aishell3-66.wav \ 17 --output-filename=./aishell3-66.wav \
18 - --text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。" 18 + --text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。长沙长大,去过长白山和长安街。行行出状元。行行,银行行长,行业。"
@@ -26,6 +26,7 @@ func main() { @@ -26,6 +26,7 @@ func main() {
26 flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") 26 flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
27 flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") 27 flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
28 flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst") 28 flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst")
  29 + flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far")
29 flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size") 30 flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size")
30 31
31 flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models") 32 flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")
@@ -6,21 +6,32 @@ @@ -6,21 +6,32 @@
6 6
7 for sid in 10 33 99; do 7 for sid in 10 33 99; do
8 ./non-streaming-tts \ 8 ./non-streaming-tts \
9 - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \  
10 - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \  
11 - --vits-tokens=./vits-zh-aishell3/tokens.txt \ 9 + --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  10 + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  11 + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
12 --sid=$sid \ 12 --sid=$sid \
13 --debug=1 \ 13 --debug=1 \
14 --output-filename=./liliana-$sid.wav \ 14 --output-filename=./liliana-$sid.wav \
15 "林美丽最美丽、最漂亮、最可爱!" 15 "林美丽最美丽、最漂亮、最可爱!"
16 16
17 ./non-streaming-tts \ 17 ./non-streaming-tts \
18 - --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \  
19 - --vits-lexicon=./vits-zh-aishell3/lexicon.txt \  
20 - --vits-tokens=./vits-zh-aishell3/tokens.txt \  
21 - --tts-rule-fsts=./vits-zh-aishell3/rule.fst \ 18 + --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  19 + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  20 + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  21 + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
22 --sid=$sid \ 22 --sid=$sid \
23 --debug=1 \ 23 --debug=1 \
24 --output-filename=./numbers-$sid.wav \ 24 --output-filename=./numbers-$sid.wav \
25 "数字12345.6789怎么念" 25 "数字12345.6789怎么念"
  26 +
  27 +./non-streaming-tts \
  28 + --vits-model=./vits-icefall-zh-aishell3/model.onnx \
  29 + --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
  30 + --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
  31 + --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
  32 + --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
  33 + --sid=$sid \
  34 + --debug=1 \
  35 + --output-filename=./heteronym-$sid.wav \
  36 + "万古长存长沙长大长白山长孙长安街"
26 done 37 done
@@ -7,10 +7,9 @@ @@ -7,10 +7,9 @@
7 7
8 import Foundation 8 import Foundation
9 9
10 -  
11 // used to get the path to espeak-ng-data 10 // used to get the path to espeak-ng-data
12 func resourceURL(to path: String) -> String { 11 func resourceURL(to path: String) -> String {
13 - return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path 12 + return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path
14 } 13 }
15 14
16 func getResource(_ forResource: String, _ ofType: String) -> String { 15 func getResource(_ forResource: String, _ ofType: String) -> String {
@@ -50,8 +49,7 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { @@ -50,8 +49,7 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
50 // See the following link 49 // See the following link
51 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3 50 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3
52 51
53 - // vits-vctk.onnx  
54 - let model = getResource("vits-aishell3", "onnx") 52 + let model = getResource("model", "onnx")
55 53
56 // lexicon.txt 54 // lexicon.txt
57 let lexicon = getResource("lexicon", "txt") 55 let lexicon = getResource("lexicon", "txt")
@@ -59,9 +57,19 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { @@ -59,9 +57,19 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
59 // tokens.txt 57 // tokens.txt
60 let tokens = getResource("tokens", "txt") 58 let tokens = getResource("tokens", "txt")
61 59
  60 + // rule.fst
  61 + let ruleFsts = getResource("rule", "fst")
  62 +
  63 + // rule.far
  64 + let ruleFars = getResource("rule", "far")
  65 +
62 let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens) 66 let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
63 let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) 67 let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
64 - var config = sherpaOnnxOfflineTtsConfig(model: modelConfig) 68 + var config = sherpaOnnxOfflineTtsConfig(
  69 + model: modelConfig,
  70 + ruleFsts: ruleFsts,
  71 + ruleFars: ruleFars
  72 + )
65 return SherpaOnnxOfflineTtsWrapper(config: &config) 73 return SherpaOnnxOfflineTtsWrapper(config: &config)
66 } 74 }
67 75
@@ -69,7 +77,6 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper { @@ -69,7 +77,6 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
69 func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper { 77 func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
70 // please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 78 // please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
71 79
72 - // vits-vctk.onnx  
73 let model = getResource("en_US-amy-low", "onnx") 80 let model = getResource("en_US-amy-low", "onnx")
74 81
75 // tokens.txt 82 // tokens.txt
@@ -78,7 +85,8 @@ func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper { @@ -78,7 +85,8 @@ func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
78 // in this case, we don't need lexicon.txt 85 // in this case, we don't need lexicon.txt
79 let dataDir = resourceURL(to: "espeak-ng-data") 86 let dataDir = resourceURL(to: "espeak-ng-data")
80 87
81 - let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: "", tokens: tokens, dataDir: dataDir) 88 + let vits = sherpaOnnxOfflineTtsVitsModelConfig(
  89 + model: model, lexicon: "", tokens: tokens, dataDir: dataDir)
82 let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits) 90 let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
83 var config = sherpaOnnxOfflineTtsConfig(model: modelConfig) 91 var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
84 92
@@ -11,6 +11,7 @@ @@ -11,6 +11,7 @@
11 sherpa-onnx-core.lib; 11 sherpa-onnx-core.lib;
12 kaldi-decoder-core.lib; 12 kaldi-decoder-core.lib;
13 sherpa-onnx-kaldifst-core.lib; 13 sherpa-onnx-kaldifst-core.lib;
  14 + sherpa-onnx-fstfar.lib;
14 sherpa-onnx-fst.lib; 15 sherpa-onnx-fst.lib;
15 kaldi-native-fbank-core.lib; 16 kaldi-native-fbank-core.lib;
16 onnxruntime.lib; 17 onnxruntime.lib;
@@ -11,6 +11,7 @@ @@ -11,6 +11,7 @@
11 sherpa-onnx-core.lib; 11 sherpa-onnx-core.lib;
12 kaldi-decoder-core.lib; 12 kaldi-decoder-core.lib;
13 sherpa-onnx-kaldifst-core.lib; 13 sherpa-onnx-kaldifst-core.lib;
  14 + sherpa-onnx-fstfar.lib;
14 sherpa-onnx-fst.lib; 15 sherpa-onnx-fst.lib;
15 kaldi-native-fbank-core.lib; 16 kaldi-native-fbank-core.lib;
16 onnxruntime.lib; 17 onnxruntime.lib;
@@ -11,6 +11,7 @@ @@ -11,6 +11,7 @@
11 sherpa-onnx-core.lib; 11 sherpa-onnx-core.lib;
12 kaldi-decoder-core.lib; 12 kaldi-decoder-core.lib;
13 sherpa-onnx-kaldifst-core.lib; 13 sherpa-onnx-kaldifst-core.lib;
  14 + sherpa-onnx-fstfar.lib;
14 sherpa-onnx-fst.lib; 15 sherpa-onnx-fst.lib;
15 kaldi-native-fbank-core.lib; 16 kaldi-native-fbank-core.lib;
16 onnxruntime.lib; 17 onnxruntime.lib;
@@ -43,8 +43,8 @@ for text-to-speech. @@ -43,8 +43,8 @@ for text-to-speech.
43 You can use the following command to run it: 43 You can use the following command to run it:
44 44
45 ```bash 45 ```bash
46 -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2  
47 -tar xvf vits-zh-aishell3.tar.bz2 46 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
  47 +tar xvf vits-icefall-zh-aishell3.tar.bz2
48 node ./test-offline-tts-zh.js 48 node ./test-offline-tts-zh.js
49 ``` 49 ```
50 50
@@ -22,6 +22,7 @@ function createOfflineTts() { @@ -22,6 +22,7 @@ function createOfflineTts() {
22 let offlineTtsConfig = { 22 let offlineTtsConfig = {
23 offlineTtsModelConfig: offlineTtsModelConfig, 23 offlineTtsModelConfig: offlineTtsModelConfig,
24 ruleFsts: '', 24 ruleFsts: '',
  25 + ruleFars: '',
25 maxNumSentences: 1, 26 maxNumSentences: 1,
26 }; 27 };
27 28
@@ -4,9 +4,9 @@ const sherpa_onnx = require('sherpa-onnx'); @@ -4,9 +4,9 @@ const sherpa_onnx = require('sherpa-onnx');
4 4
5 function createOfflineTts() { 5 function createOfflineTts() {
6 let offlineTtsVitsModelConfig = { 6 let offlineTtsVitsModelConfig = {
7 - model: './vits-zh-aishell3/vits-aishell3.onnx',  
8 - lexicon: './vits-zh-aishell3/lexicon.txt',  
9 - tokens: './vits-zh-aishell3/tokens.txt', 7 + model: './vits-icefall-zh-aishell3/vits-aishell3.onnx',
  8 + lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
  9 + tokens: './vits-icefall-zh-aishell3/tokens.txt',
10 dataDir: '', 10 dataDir: '',
11 noiseScale: 0.667, 11 noiseScale: 0.667,
12 noiseScaleW: 0.8, 12 noiseScaleW: 0.8,
@@ -21,7 +21,9 @@ function createOfflineTts() { @@ -21,7 +21,9 @@ function createOfflineTts() {
21 21
22 let offlineTtsConfig = { 22 let offlineTtsConfig = {
23 offlineTtsModelConfig: offlineTtsModelConfig, 23 offlineTtsModelConfig: offlineTtsModelConfig,
24 - ruleFsts: './vits-zh-aishell3/rule.fst', 24 + ruleFsts:
  25 + './vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
  26 + ruleFars: './vits-icefall-zh-aishell3/rule.far',
25 maxNumSentences: 1, 27 maxNumSentences: 1,
26 }; 28 };
27 29
@@ -56,6 +56,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt @@ -56,6 +56,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
56 sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt 56 sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
57 {% endif %} 57 {% endif %}
58 58
  59 +{% if tts_model.rule_fars %}
  60 + rule_fars={{ tts_model.rule_fars }}
  61 + sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt
  62 +{% endif %}
  63 +
59 {% if tts_model.data_dir %} 64 {% if tts_model.data_dir %}
60 data_dir={{ tts_model.data_dir }} 65 data_dir={{ tts_model.data_dir }}
61 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt 66 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt
@@ -54,6 +54,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt @@ -54,6 +54,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
54 sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt 54 sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
55 {% endif %} 55 {% endif %}
56 56
  57 +{% if tts_model.rule_fars %}
  58 + rule_fars={{ tts_model.rule_fars }}
  59 + sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt
  60 +{% endif %}
  61 +
57 {% if tts_model.data_dir %} 62 {% if tts_model.data_dir %}
58 data_dir={{ tts_model.data_dir }} 63 data_dir={{ tts_model.data_dir }}
59 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt 64 sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt
@@ -33,6 +33,7 @@ class TtsModel: @@ -33,6 +33,7 @@ class TtsModel:
33 model_name: str = "" 33 model_name: str = ""
34 lang: str = "" # en, zh, fr, de, etc. 34 lang: str = "" # en, zh, fr, de, etc.
35 rule_fsts: Optional[List[str]] = None 35 rule_fsts: Optional[List[str]] = None
  36 + rule_fars: Optional[List[str]] = None
36 data_dir: Optional[str] = None 37 data_dir: Optional[str] = None
37 is_char: bool = False 38 is_char: bool = False
38 lang_iso_639_3: str = "" 39 lang_iso_639_3: str = ""
@@ -241,98 +242,94 @@ def get_mimic3_models() -> List[TtsModel]: @@ -241,98 +242,94 @@ def get_mimic3_models() -> List[TtsModel]:
241 242
242 243
243 def get_vits_models() -> List[TtsModel]: 244 def get_vits_models() -> List[TtsModel]:
244 - return [ 245 + chinese_models = [
245 # Chinese 246 # Chinese
246 TtsModel( 247 TtsModel(
247 model_dir="vits-icefall-zh-aishell3", 248 model_dir="vits-icefall-zh-aishell3",
248 model_name="model.onnx", 249 model_name="model.onnx",
249 lang="zh", 250 lang="zh",
250 - rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/rule.fst", 251 + rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst",
  252 + rule_fars="vits-icefall-zh-aishell3/rule.far",
251 ), 253 ),
252 TtsModel( 254 TtsModel(
253 model_dir="vits-zh-aishell3", 255 model_dir="vits-zh-aishell3",
254 model_name="vits-aishell3.onnx", 256 model_name="vits-aishell3.onnx",
255 lang="zh", 257 lang="zh",
256 - rule_fsts="vits-zh-aishell3/rule.fst",  
257 ), 258 ),
258 TtsModel( 259 TtsModel(
259 model_dir="vits-zh-hf-doom", 260 model_dir="vits-zh-hf-doom",
260 model_name="doom.onnx", 261 model_name="doom.onnx",
261 lang="zh", 262 lang="zh",
262 - rule_fsts="vits-zh-hf-doom/rule.fst",  
263 ), 263 ),
264 TtsModel( 264 TtsModel(
265 model_dir="vits-zh-hf-echo", 265 model_dir="vits-zh-hf-echo",
266 model_name="echo.onnx", 266 model_name="echo.onnx",
267 lang="zh", 267 lang="zh",
268 - rule_fsts="vits-zh-hf-echo/rule.fst",  
269 ), 268 ),
270 TtsModel( 269 TtsModel(
271 model_dir="vits-zh-hf-zenyatta", 270 model_dir="vits-zh-hf-zenyatta",
272 model_name="zenyatta.onnx", 271 model_name="zenyatta.onnx",
273 lang="zh", 272 lang="zh",
274 - rule_fsts="vits-zh-hf-zenyatta/rule.fst",  
275 ), 273 ),
276 TtsModel( 274 TtsModel(
277 model_dir="vits-zh-hf-abyssinvoker", 275 model_dir="vits-zh-hf-abyssinvoker",
278 model_name="abyssinvoker.onnx", 276 model_name="abyssinvoker.onnx",
279 lang="zh", 277 lang="zh",
280 - rule_fsts="vits-zh-hf-abyssinvoker/rule.fst",  
281 ), 278 ),
282 TtsModel( 279 TtsModel(
283 model_dir="vits-zh-hf-keqing", 280 model_dir="vits-zh-hf-keqing",
284 model_name="keqing.onnx", 281 model_name="keqing.onnx",
285 lang="zh", 282 lang="zh",
286 - rule_fsts="vits-zh-hf-keqing/rule.fst",  
287 ), 283 ),
288 TtsModel( 284 TtsModel(
289 model_dir="vits-zh-hf-eula", 285 model_dir="vits-zh-hf-eula",
290 model_name="eula.onnx", 286 model_name="eula.onnx",
291 lang="zh", 287 lang="zh",
292 - rule_fsts="vits-zh-hf-eula/rule.fst",  
293 ), 288 ),
294 TtsModel( 289 TtsModel(
295 model_dir="vits-zh-hf-bronya", 290 model_dir="vits-zh-hf-bronya",
296 model_name="bronya.onnx", 291 model_name="bronya.onnx",
297 lang="zh", 292 lang="zh",
298 - rule_fsts="vits-zh-hf-bronya/rule.fst",  
299 ), 293 ),
300 TtsModel( 294 TtsModel(
301 model_dir="vits-zh-hf-theresa", 295 model_dir="vits-zh-hf-theresa",
302 model_name="theresa.onnx", 296 model_name="theresa.onnx",
303 lang="zh", 297 lang="zh",
304 - rule_fsts="vits-zh-hf-theresa/rule.fst",  
305 ), 298 ),
306 TtsModel( 299 TtsModel(
307 model_dir="vits-zh-hf-fanchen-wnj", 300 model_dir="vits-zh-hf-fanchen-wnj",
308 model_name="vits-zh-hf-fanchen-wnj.onnx", 301 model_name="vits-zh-hf-fanchen-wnj.onnx",
309 lang="zh", 302 lang="zh",
310 - rule_fsts="vits-zh-hf-fanchen-wnj/rule.fst",  
311 ), 303 ),
312 TtsModel( 304 TtsModel(
313 model_dir="vits-zh-hf-fanchen-C", 305 model_dir="vits-zh-hf-fanchen-C",
314 model_name="vits-zh-hf-fanchen-C.onnx", 306 model_name="vits-zh-hf-fanchen-C.onnx",
315 lang="zh", 307 lang="zh",
316 - rule_fsts="vits-zh-hf-fanchen-C/rule.fst",  
317 ), 308 ),
318 TtsModel( 309 TtsModel(
319 model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe", 310 model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe",
320 model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx", 311 model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx",
321 lang="zh", 312 lang="zh",
322 - rule_fsts="vits-zh-hf-fanchen-ZhiHuiLaoZhe/rule.fst",  
323 ), 313 ),
324 TtsModel( 314 TtsModel(
325 model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new", 315 model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new",
326 model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx", 316 model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx",
327 lang="zh", 317 lang="zh",
328 - rule_fsts="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new/rule.fst",  
329 ), 318 ),
330 TtsModel( 319 TtsModel(
331 model_dir="vits-zh-hf-fanchen-unity", 320 model_dir="vits-zh-hf-fanchen-unity",
332 model_name="vits-zh-hf-fanchen-unity.onnx", 321 model_name="vits-zh-hf-fanchen-unity.onnx",
333 lang="zh", 322 lang="zh",
334 - rule_fsts="vits-zh-hf-fanchen-unity/rule.fst",  
335 ), 323 ),
  324 + ]
  325 +
  326 + rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
  327 + for m in chinese_models:
  328 + s = [f"{m.model_dir}/{r}" for r in rule_fsts]
  329 + m.rule_fsts = ",".join(s)
  330 + m.rule_fars = f"{m.model_dir}/rule.far"
  331 +
  332 + all_models = chinese_models + [
336 TtsModel( 333 TtsModel(
337 model_dir="vits-cantonese-hf-xiaomaiiwn", 334 model_dir="vits-cantonese-hf-xiaomaiiwn",
338 model_name="vits-cantonese-hf-xiaomaiiwn.onnx", 335 model_name="vits-cantonese-hf-xiaomaiiwn.onnx",
@@ -346,6 +343,8 @@ def get_vits_models() -> List[TtsModel]: @@ -346,6 +343,8 @@ def get_vits_models() -> List[TtsModel]:
346 # fmt: on 343 # fmt: on
347 ] 344 ]
348 345
  346 + return all_models
  347 +
349 348
350 def main(): 349 def main():
351 args = get_args() 350 args = get_args()
@@ -40,6 +40,7 @@ def process_linux(s): @@ -40,6 +40,7 @@ def process_linux(s):
40 "libpiper_phonemize.so.1", 40 "libpiper_phonemize.so.1",
41 "libsherpa-onnx-c-api.so", 41 "libsherpa-onnx-c-api.so",
42 "libsherpa-onnx-core.so", 42 "libsherpa-onnx-core.so",
  43 + "libsherpa-onnx-fstfar.so.7",
43 "libsherpa-onnx-fst.so.6", 44 "libsherpa-onnx-fst.so.6",
44 "libsherpa-onnx-kaldifst-core.so", 45 "libsherpa-onnx-kaldifst-core.so",
45 "libucd.so", 46 "libucd.so",
@@ -68,6 +69,7 @@ def process_macos(s): @@ -68,6 +69,7 @@ def process_macos(s):
68 "libpiper_phonemize.1.dylib", 69 "libpiper_phonemize.1.dylib",
69 "libsherpa-onnx-c-api.dylib", 70 "libsherpa-onnx-c-api.dylib",
70 "libsherpa-onnx-core.dylib", 71 "libsherpa-onnx-core.dylib",
  72 + "libsherpa-onnx-fstfar.7.dylib",
71 "libsherpa-onnx-fst.6.dylib", 73 "libsherpa-onnx-fst.6.dylib",
72 "libsherpa-onnx-kaldifst-core.dylib", 74 "libsherpa-onnx-kaldifst-core.dylib",
73 "libucd.dylib", 75 "libucd.dylib",
@@ -96,6 +98,7 @@ def process_windows(s, rid): @@ -96,6 +98,7 @@ def process_windows(s, rid):
96 "piper_phonemize.dll", 98 "piper_phonemize.dll",
97 "sherpa-onnx-c-api.dll", 99 "sherpa-onnx-c-api.dll",
98 "sherpa-onnx-core.dll", 100 "sherpa-onnx-core.dll",
  101 + "sherpa-onnx-fstfar.lib",
99 "sherpa-onnx-fst.lib", 102 "sherpa-onnx-fst.lib",
100 "sherpa-onnx-kaldifst-core.lib", 103 "sherpa-onnx-kaldifst-core.lib",
101 "ucd.dll", 104 "ucd.dll",
@@ -67,6 +67,7 @@ namespace SherpaOnnx @@ -67,6 +67,7 @@ namespace SherpaOnnx
67 Model = new OfflineTtsModelConfig(); 67 Model = new OfflineTtsModelConfig();
68 RuleFsts = ""; 68 RuleFsts = "";
69 MaxNumSentences = 1; 69 MaxNumSentences = 1;
  70 + RuleFars = "";
70 } 71 }
71 public OfflineTtsModelConfig Model; 72 public OfflineTtsModelConfig Model;
72 73
@@ -74,6 +75,9 @@ namespace SherpaOnnx @@ -74,6 +75,9 @@ namespace SherpaOnnx
74 public string RuleFsts; 75 public string RuleFsts;
75 76
76 public int MaxNumSentences; 77 public int MaxNumSentences;
  78 +
  79 + [MarshalAs(UnmanagedType.LPStr)]
  80 + public string RuleFars;
77 } 81 }
78 82
79 public class OfflineTtsGeneratedAudio 83 public class OfflineTtsGeneratedAudio
@@ -41,6 +41,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then @@ -41,6 +41,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
41 cd .. 41 cd ..
42 rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0 42 rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0
43 rm -v libsherpa-onnx-fst.so 43 rm -v libsherpa-onnx-fst.so
  44 + rm -v libsherpa-onnx-fstfar.so
44 rm -v libonnxruntime.so 45 rm -v libonnxruntime.so
45 rm -v libcargs.so 46 rm -v libcargs.so
46 rm -rf wheel 47 rm -rf wheel
@@ -67,6 +68,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then @@ -67,6 +68,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
67 rm -v libonnxruntime.dylib 68 rm -v libonnxruntime.dylib
68 rm -v libpiper_phonemize.1.2.0.dylib libpiper_phonemize.dylib 69 rm -v libpiper_phonemize.1.2.0.dylib libpiper_phonemize.dylib
69 rm -v libsherpa-onnx-fst.dylib 70 rm -v libsherpa-onnx-fst.dylib
  71 + rm -v libsherpa-onnx-fstfar.dylib
70 rm -rf wheel 72 rm -rf wheel
71 ls -lh 73 ls -lh
72 cd .. 74 cd ..
@@ -2,5 +2,5 @@ @@ -2,5 +2,5 @@
2 2
3 package sherpa_onnx 3 package sherpa_onnx
4 4
5 -// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin 5 +// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin
6 import "C" 6 import "C"
@@ -554,6 +554,7 @@ type OfflineTtsModelConfig struct { @@ -554,6 +554,7 @@ type OfflineTtsModelConfig struct {
554 type OfflineTtsConfig struct { 554 type OfflineTtsConfig struct {
555 Model OfflineTtsModelConfig 555 Model OfflineTtsModelConfig
556 RuleFsts string 556 RuleFsts string
  557 + RuleFars string
557 MaxNumSentences int 558 MaxNumSentences int
558 } 559 }
559 560
@@ -583,6 +584,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { @@ -583,6 +584,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
583 c.rule_fsts = C.CString(config.RuleFsts) 584 c.rule_fsts = C.CString(config.RuleFsts)
584 defer C.free(unsafe.Pointer(c.rule_fsts)) 585 defer C.free(unsafe.Pointer(c.rule_fsts))
585 586
  587 + c.rule_fars = C.CString(config.RuleFars)
  588 + defer C.free(unsafe.Pointer(c.rule_fars))
  589 +
586 c.max_num_sentences = C.int(config.MaxNumSentences) 590 c.max_num_sentences = C.int(config.MaxNumSentences)
587 591
588 c.model.vits.model = C.CString(config.Model.Vits.Model) 592 c.model.vits.model = C.CString(config.Model.Vits.Model)
@@ -818,6 +818,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( @@ -818,6 +818,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
818 tts_config.model.debug = config->model.debug; 818 tts_config.model.debug = config->model.debug;
819 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); 819 tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
820 tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); 820 tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
  821 + tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
821 tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); 822 tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
822 823
823 if (tts_config.model.debug) { 824 if (tts_config.model.debug) {
@@ -783,6 +783,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig { @@ -783,6 +783,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
783 SherpaOnnxOfflineTtsModelConfig model; 783 SherpaOnnxOfflineTtsModelConfig model;
784 const char *rule_fsts; 784 const char *rule_fsts;
785 int32_t max_num_sentences; 785 int32_t max_num_sentences;
  786 + const char *rule_fars;
786 } SherpaOnnxOfflineTtsConfig; 787 } SherpaOnnxOfflineTtsConfig;
787 788
788 SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio { 789 SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
@@ -164,6 +164,7 @@ endif() @@ -164,6 +164,7 @@ endif()
164 164
165 if(SHERPA_ONNX_ENABLE_TTS) 165 if(SHERPA_ONNX_ENABLE_TTS)
166 target_link_libraries(sherpa-onnx-core piper_phonemize) 166 target_link_libraries(sherpa-onnx-core piper_phonemize)
  167 + target_link_libraries(sherpa-onnx-core fstfar fst)
167 endif() 168 endif()
168 169
169 if(SHERPA_ONNX_ENABLE_CHECK) 170 if(SHERPA_ONNX_ENABLE_CHECK)
@@ -18,7 +18,6 @@ @@ -18,7 +18,6 @@
18 #endif 18 #endif
19 19
20 #include <memory> 20 #include <memory>
21 -#include <regex> // NOLINT  
22 21
23 #include "sherpa-onnx/csrc/macros.h" 22 #include "sherpa-onnx/csrc/macros.h"
24 #include "sherpa-onnx/csrc/onnx-utils.h" 23 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -26,6 +25,55 @@ @@ -26,6 +25,55 @@
26 25
27 namespace sherpa_onnx { 26 namespace sherpa_onnx {
28 27
  28 +static std::vector<std::string> ProcessHeteronyms(
  29 + const std::vector<std::string> &words) {
  30 + std::vector<std::string> ans;
  31 + ans.reserve(words.size());
  32 +
  33 + int32_t num_words = static_cast<int32_t>(words.size());
  34 + int32_t i = 0;
  35 + int32_t prev = -1;
  36 + while (i < num_words) {
  37 + // start of a phrase #$|
  38 + if ((i + 2 < num_words) && words[i] == "#" && words[i + 1] == "$" &&
  39 + words[i + 2] == "|") {
  40 + if (prev == -1) {
  41 + prev = i + 3;
  42 + }
  43 + i = i + 3;
  44 + continue;
  45 + }
  46 +
  47 + // end of a phrase |$#
  48 + if ((i + 2 < num_words) && words[i] == "|" && words[i + 1] == "$" &&
  49 + words[i + 2] == "#") {
  50 + if (prev != -1) {
  51 + std::ostringstream os;
  52 + for (int32_t k = prev; k < i; ++k) {
  53 + if (words[k] != "|" && words[k] != "$" && words[k] != "#") {
  54 + os << words[k];
  55 + }
  56 + }
  57 + ans.push_back(os.str());
  58 +
  59 + prev = -1;
  60 + }
  61 +
  62 + i += 3;
  63 + continue;
  64 + }
  65 +
  66 + if (prev == -1) {
  67 + // not inside a phrase
  68 + ans.push_back(words[i]);
  69 + }
  70 +
  71 + ++i;
  72 + }
  73 +
  74 + return ans;
  75 +}
  76 +
29 static void ToLowerCase(std::string *in_out) { 77 static void ToLowerCase(std::string *in_out) {
30 std::transform(in_out->begin(), in_out->end(), in_out->begin(), 78 std::transform(in_out->begin(), in_out->end(), in_out->begin(),
31 [](unsigned char c) { return std::tolower(c); }); 79 [](unsigned char c) { return std::tolower(c); });
@@ -148,36 +196,9 @@ std::vector<std::vector<int64_t>> Lexicon::ConvertTextToTokenIdsChinese( @@ -148,36 +196,9 @@ std::vector<std::vector<int64_t>> Lexicon::ConvertTextToTokenIdsChinese(
148 const std::string &_text) const { 196 const std::string &_text) const {
149 std::string text(_text); 197 std::string text(_text);
150 ToLowerCase(&text); 198 ToLowerCase(&text);
151 - std::vector<std::string> words;  
152 - if (pattern_) {  
153 - // Handle polyphones  
154 - size_t pos = 0;  
155 - auto begin = std::sregex_iterator(text.begin(), text.end(), *pattern_);  
156 - auto end = std::sregex_iterator();  
157 - for (std::sregex_iterator i = begin; i != end; ++i) {  
158 - std::smatch match = *i;  
159 - if (pos < match.position()) {  
160 - auto this_segment = text.substr(pos, match.position() - pos);  
161 - auto this_segment_words = SplitUtf8(this_segment);  
162 - words.insert(words.end(), this_segment_words.begin(),  
163 - this_segment_words.end());  
164 - pos = match.position() + match.length();  
165 - } else if (pos == match.position()) {  
166 - pos = match.position() + match.length();  
167 - }  
168 199
169 - words.push_back(match.str());  
170 - }  
171 -  
172 - if (pos < text.size()) {  
173 - auto this_segment = text.substr(pos, text.size() - pos);  
174 - auto this_segment_words = SplitUtf8(this_segment);  
175 - words.insert(words.end(), this_segment_words.begin(),  
176 - this_segment_words.end());  
177 - }  
178 - } else {  
179 - words = SplitUtf8(text);  
180 - } 200 + std::vector<std::string> words = SplitUtf8(text);
  201 + words = ProcessHeteronyms(words);
181 202
182 if (debug_) { 203 if (debug_) {
183 fprintf(stderr, "Input text in string: %s\n", text.c_str()); 204 fprintf(stderr, "Input text in string: %s\n", text.c_str());
@@ -357,9 +378,6 @@ void Lexicon::InitLexicon(std::istream &is) { @@ -357,9 +378,6 @@ void Lexicon::InitLexicon(std::istream &is) {
357 std::string line; 378 std::string line;
358 std::string phone; 379 std::string phone;
359 380
360 - std::ostringstream os;  
361 - std::string sep;  
362 -  
363 while (std::getline(is, line)) { 381 while (std::getline(is, line)) {
364 std::istringstream iss(line); 382 std::istringstream iss(line);
365 383
@@ -381,18 +399,9 @@ void Lexicon::InitLexicon(std::istream &is) { @@ -381,18 +399,9 @@ void Lexicon::InitLexicon(std::istream &is) {
381 if (ids.empty()) { 399 if (ids.empty()) {
382 continue; 400 continue;
383 } 401 }
384 - if (language_ == Language::kChinese && word.size() > 3) {  
385 - // this is not a single word;  
386 - os << sep << word;  
387 - sep = "|";  
388 - }  
389 402
390 word2ids_.insert({std::move(word), std::move(ids)}); 403 word2ids_.insert({std::move(word), std::move(ids)});
391 } 404 }
392 -  
393 - if (!sep.empty()) {  
394 - pattern_ = std::make_unique<std::regex>(os.str());  
395 - }  
396 } 405 }
397 406
398 void Lexicon::InitPunctuations(const std::string &punctuations) { 407 void Lexicon::InitPunctuations(const std::string &punctuations) {
@@ -7,7 +7,6 @@ @@ -7,7 +7,6 @@
7 7
8 #include <cstdint> 8 #include <cstdint>
9 #include <memory> 9 #include <memory>
10 -#include <regex> // NOLINT  
11 #include <string> 10 #include <string>
12 #include <unordered_map> 11 #include <unordered_map>
13 #include <unordered_set> 12 #include <unordered_set>
@@ -65,9 +64,6 @@ class Lexicon : public OfflineTtsFrontend { @@ -65,9 +64,6 @@ class Lexicon : public OfflineTtsFrontend {
65 std::unordered_map<std::string, int32_t> token2id_; 64 std::unordered_map<std::string, int32_t> token2id_;
66 Language language_; 65 Language language_;
67 bool debug_; 66 bool debug_;
68 -  
69 - // for Chinese polyphones  
70 - std::unique_ptr<std::regex> pattern_;  
71 }; 67 };
72 68
73 } // namespace sherpa_onnx 69 } // namespace sherpa_onnx
@@ -15,6 +15,9 @@ @@ -15,6 +15,9 @@
15 #include "android/asset_manager.h" 15 #include "android/asset_manager.h"
16 #include "android/asset_manager_jni.h" 16 #include "android/asset_manager_jni.h"
17 #endif 17 #endif
  18 +
  19 +#include "fst/extensions/far/far.h"
  20 +#include "kaldifst/csrc/kaldi-fst-io.h"
18 #include "kaldifst/csrc/text-normalizer.h" 21 #include "kaldifst/csrc/text-normalizer.h"
19 #include "sherpa-onnx/csrc/lexicon.h" 22 #include "sherpa-onnx/csrc/lexicon.h"
20 #include "sherpa-onnx/csrc/macros.h" 23 #include "sherpa-onnx/csrc/macros.h"
@@ -46,6 +49,32 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -46,6 +49,32 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
46 tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f)); 49 tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
47 } 50 }
48 } 51 }
  52 +
  53 + if (!config.rule_fars.empty()) {
  54 + if (config.model.debug) {
  55 + SHERPA_ONNX_LOGE("Loading FST archives");
  56 + }
  57 + std::vector<std::string> files;
  58 + SplitStringToVector(config.rule_fars, ",", false, &files);
  59 + for (const auto &f : files) {
  60 + if (config.model.debug) {
  61 + SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
  62 + }
  63 + std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
  64 + fst::FarReader<fst::StdArc>::Open(f));
  65 + for (; !reader->Done(); reader->Next()) {
  66 + std::unique_ptr<fst::StdConstFst> r(
  67 + fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));
  68 +
  69 + tn_list_.push_back(
  70 + std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
  71 + }
  72 + }
  73 +
  74 + if (config.model.debug) {
  75 + SHERPA_ONNX_LOGE("FST archives loaded!");
  76 + }
  77 + }
49 } 78 }
50 79
51 #if __ANDROID_API__ >= 9 80 #if __ANDROID_API__ >= 9
@@ -20,7 +20,14 @@ void OfflineTtsConfig::Register(ParseOptions *po) { @@ -20,7 +20,14 @@ void OfflineTtsConfig::Register(ParseOptions *po) {
20 "It not empty, it contains a list of rule FST filenames." 20 "It not empty, it contains a list of rule FST filenames."
21 "Multiple filenames are separated by a comma and they are " 21 "Multiple filenames are separated by a comma and they are "
22 "applied from left to right. An example value: " 22 "applied from left to right. An example value: "
23 - "rule1.fst,rule2,fst,rule3.fst"); 23 + "rule1.fst,rule2.fst,rule3.fst");
  24 +
  25 + po->Register("tts-rule-fars", &rule_fars,
  26 + "It not empty, it contains a list of rule FST archive filenames."
  27 + "Multiple filenames are separated by a comma and they are "
  28 + "applied from left to right. An example value: "
  29 + "rule1.far,rule2.far,rule3.far. Note that an *.far can contain "
  30 + "multiple *.fst files");
24 31
25 po->Register( 32 po->Register(
26 "tts-max-num-sentences", &max_num_sentences, 33 "tts-max-num-sentences", &max_num_sentences,
@@ -41,6 +48,17 @@ bool OfflineTtsConfig::Validate() const { @@ -41,6 +48,17 @@ bool OfflineTtsConfig::Validate() const {
41 } 48 }
42 } 49 }
43 50
  51 + if (!rule_fars.empty()) {
  52 + std::vector<std::string> files;
  53 + SplitStringToVector(rule_fars, ",", false, &files);
  54 + for (const auto &f : files) {
  55 + if (!FileExists(f)) {
  56 + SHERPA_ONNX_LOGE("Rule far %s does not exist. ", f.c_str());
  57 + return false;
  58 + }
  59 + }
  60 + }
  61 +
44 return model.Validate(); 62 return model.Validate();
45 } 63 }
46 64
@@ -50,6 +68,7 @@ std::string OfflineTtsConfig::ToString() const { @@ -50,6 +68,7 @@ std::string OfflineTtsConfig::ToString() const {
50 os << "OfflineTtsConfig("; 68 os << "OfflineTtsConfig(";
51 os << "model=" << model.ToString() << ", "; 69 os << "model=" << model.ToString() << ", ";
52 os << "rule_fsts=\"" << rule_fsts << "\", "; 70 os << "rule_fsts=\"" << rule_fsts << "\", ";
  71 + os << "rule_fars=\"" << rule_fars << "\", ";
53 os << "max_num_sentences=" << max_num_sentences << ")"; 72 os << "max_num_sentences=" << max_num_sentences << ")";
54 73
55 return os.str(); 74 return os.str();
@@ -29,6 +29,9 @@ struct OfflineTtsConfig { @@ -29,6 +29,9 @@ struct OfflineTtsConfig {
29 // If there are multiple rules, they are applied from left to right. 29 // If there are multiple rules, they are applied from left to right.
30 std::string rule_fsts; 30 std::string rule_fsts;
31 31
  32 + // If there are multiple FST archives, they are applied from left to right.
  33 + std::string rule_fars;
  34 +
32 // Maximum number of sentences that we process at a time. 35 // Maximum number of sentences that we process at a time.
33 // This is to avoid OOM for very long input text. 36 // This is to avoid OOM for very long input text.
34 // If you set it to -1, then we process all sentences in a single batch. 37 // If you set it to -1, then we process all sentences in a single batch.
@@ -36,9 +39,11 @@ struct OfflineTtsConfig { @@ -36,9 +39,11 @@ struct OfflineTtsConfig {
36 39
37 OfflineTtsConfig() = default; 40 OfflineTtsConfig() = default;
38 OfflineTtsConfig(const OfflineTtsModelConfig &model, 41 OfflineTtsConfig(const OfflineTtsModelConfig &model,
39 - const std::string &rule_fsts, int32_t max_num_sentences) 42 + const std::string &rule_fsts, const std::string &rule_fars,
  43 + int32_t max_num_sentences)
40 : model(model), 44 : model(model),
41 rule_fsts(rule_fsts), 45 rule_fsts(rule_fsts),
  46 + rule_fars(rule_fars),
42 max_num_sentences(max_num_sentences) {} 47 max_num_sentences(max_num_sentences) {}
43 48
44 void Register(ParseOptions *po); 49 void Register(ParseOptions *po);
@@ -878,6 +878,13 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { @@ -878,6 +878,13 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
878 ans.rule_fsts = p; 878 ans.rule_fsts = p;
879 env->ReleaseStringUTFChars(s, p); 879 env->ReleaseStringUTFChars(s, p);
880 880
  881 + // for ruleFars
  882 + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
  883 + s = (jstring)env->GetObjectField(config, fid);
  884 + p = env->GetStringUTFChars(s, nullptr);
  885 + ans.rule_fars = p;
  886 + env->ReleaseStringUTFChars(s, p);
  887 +
881 fid = env->GetFieldID(cls, "maxNumSentences", "I"); 888 fid = env->GetFieldID(cls, "maxNumSentences", "I");
882 ans.max_num_sentences = env->GetIntField(config, fid); 889 ans.max_num_sentences = env->GetIntField(config, fid);
883 890
@@ -32,11 +32,12 @@ static void PybindOfflineTtsConfig(py::module *m) { @@ -32,11 +32,12 @@ static void PybindOfflineTtsConfig(py::module *m) {
32 py::class_<PyClass>(*m, "OfflineTtsConfig") 32 py::class_<PyClass>(*m, "OfflineTtsConfig")
33 .def(py::init<>()) 33 .def(py::init<>())
34 .def(py::init<const OfflineTtsModelConfig &, const std::string &, 34 .def(py::init<const OfflineTtsModelConfig &, const std::string &,
35 - int32_t>(), 35 + const std::string &, int32_t>(),
36 py::arg("model"), py::arg("rule_fsts") = "", 36 py::arg("model"), py::arg("rule_fsts") = "",
37 - py::arg("max_num_sentences") = 2) 37 + py::arg("rule_fars") = "", py::arg("max_num_sentences") = 2)
38 .def_readwrite("model", &PyClass::model) 38 .def_readwrite("model", &PyClass::model)
39 .def_readwrite("rule_fsts", &PyClass::rule_fsts) 39 .def_readwrite("rule_fsts", &PyClass::rule_fsts)
  40 + .def_readwrite("rule_fars", &PyClass::rule_fars)
40 .def_readwrite("max_num_sentences", &PyClass::max_num_sentences) 41 .def_readwrite("max_num_sentences", &PyClass::max_num_sentences)
41 .def("validate", &PyClass::Validate) 42 .def("validate", &PyClass::Validate)
42 .def("__str__", &PyClass::ToString); 43 .def("__str__", &PyClass::ToString);
@@ -652,12 +652,14 @@ func sherpaOnnxOfflineTtsModelConfig( @@ -652,12 +652,14 @@ func sherpaOnnxOfflineTtsModelConfig(
652 func sherpaOnnxOfflineTtsConfig( 652 func sherpaOnnxOfflineTtsConfig(
653 model: SherpaOnnxOfflineTtsModelConfig, 653 model: SherpaOnnxOfflineTtsModelConfig,
654 ruleFsts: String = "", 654 ruleFsts: String = "",
  655 + ruleFars: String = "",
655 maxNumSenetences: Int = 2 656 maxNumSenetences: Int = 2
656 ) -> SherpaOnnxOfflineTtsConfig { 657 ) -> SherpaOnnxOfflineTtsConfig {
657 return SherpaOnnxOfflineTtsConfig( 658 return SherpaOnnxOfflineTtsConfig(
658 model: model, 659 model: model,
659 rule_fsts: toCPointer(ruleFsts), 660 rule_fsts: toCPointer(ruleFsts),
660 - max_num_sentences: Int32(maxNumSenetences) 661 + max_num_sentences: Int32(maxNumSenetences),
  662 + rule_fars: toCPointer(ruleFars)
661 ) 663 )
662 } 664 }
663 665
@@ -90,7 +90,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { @@ -90,7 +90,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
90 function initSherpaOnnxOfflineTtsConfig(config, Module) { 90 function initSherpaOnnxOfflineTtsConfig(config, Module) {
91 const modelConfig = 91 const modelConfig =
92 initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); 92 initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
93 - const len = modelConfig.len + 2 * 4; 93 + const len = modelConfig.len + 3 * 4;
94 const ptr = Module._malloc(len); 94 const ptr = Module._malloc(len);
95 95
96 let offset = 0; 96 let offset = 0;
@@ -98,12 +98,19 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) { @@ -98,12 +98,19 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) {
98 offset += modelConfig.len; 98 offset += modelConfig.len;
99 99
100 const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1; 100 const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;
101 - const buffer = Module._malloc(ruleFstsLen); 101 + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars) + 1;
  102 +
  103 + const buffer = Module._malloc(ruleFstsLen + ruleFarsLen);
102 Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); 104 Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
  105 + Module.stringToUTF8(config.ruleFars, buffer + ruleFstsLen, ruleFarsLen);
  106 +
103 Module.setValue(ptr + offset, buffer, 'i8*'); 107 Module.setValue(ptr + offset, buffer, 'i8*');
104 offset += 4; 108 offset += 4;
105 109
106 Module.setValue(ptr + offset, config.maxNumSentences, 'i32'); 110 Module.setValue(ptr + offset, config.maxNumSentences, 'i32');
  111 + offset += 4;
  112 +
  113 + Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*');
107 114
108 return { 115 return {
109 buffer: buffer, ptr: ptr, len: len, config: modelConfig, 116 buffer: buffer, ptr: ptr, len: len, config: modelConfig,
@@ -190,6 +197,7 @@ function createOfflineTts(Module, myConfig) { @@ -190,6 +197,7 @@ function createOfflineTts(Module, myConfig) {
190 let offlineTtsConfig = { 197 let offlineTtsConfig = {
191 offlineTtsModelConfig: offlineTtsModelConfig, 198 offlineTtsModelConfig: offlineTtsModelConfig,
192 ruleFsts: '', 199 ruleFsts: '',
  200 + ruleFars: '',
193 maxNumSentences: 1, 201 maxNumSentences: 1,
194 } 202 }
195 203
@@ -18,7 +18,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == @@ -18,7 +18,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
18 sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4, 18 sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4,
19 ""); 19 "");
20 static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == 20 static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
21 - sizeof(SherpaOnnxOfflineTtsModelConfig) + 2 * 4, 21 + sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4,
22 ""); 22 "");
23 23
24 void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { 24 void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
@@ -40,6 +40,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { @@ -40,6 +40,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
40 40
41 fprintf(stdout, "----------tts config----------\n"); 41 fprintf(stdout, "----------tts config----------\n");
42 fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); 42 fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
  43 + fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars);
43 fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); 44 fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
44 } 45 }
45 46