Fangjun Kuang
Committed by GitHub

add more text-to-speech models from piper (#988)

@@ -31,6 +31,7 @@ with the following APIs @@ -31,6 +31,7 @@ with the following APIs
31 - C++, C, Python, Go, ``C#`` 31 - C++, C, Python, Go, ``C#``
32 - Java, Kotlin, JavaScript 32 - Java, Kotlin, JavaScript
33 - Swift 33 - Swift
  34 + - Dart
34 35
35 ## Links for pre-built Android APKs 36 ## Links for pre-built Android APKs
36 37
@@ -52,13 +52,13 @@ def get_2nd_models(): @@ -52,13 +52,13 @@ def get_2nd_models():
52 short_name="whisper_tiny", 52 short_name="whisper_tiny",
53 cmd=""" 53 cmd="""
54 pushd $model_name 54 pushd $model_name
55 - rm -v tiny.en-encoder.onnx  
56 - rm -v tiny.en-decoder.onnx 55 + rm -fv tiny.en-encoder.onnx
  56 + rm -fv tiny.en-decoder.onnx
57 rm -rf test_wavs 57 rm -rf test_wavs
58 - rm -v *.py  
59 - rm -v requirements.txt  
60 - rm -v .gitignore  
61 - rm -v README.md 58 + rm -fv *.py
  59 + rm -fv requirements.txt
  60 + rm -fv .gitignore
  61 + rm -fv README.md
62 62
63 ls -lh 63 ls -lh
64 64
@@ -73,7 +73,7 @@ def get_2nd_models(): @@ -73,7 +73,7 @@ def get_2nd_models():
73 cmd=""" 73 cmd="""
74 pushd $model_name 74 pushd $model_name
75 75
76 - rm -v README.md 76 + rm -fv README.md
77 rm -rfv test_wavs 77 rm -rfv test_wavs
78 rm model.onnx 78 rm model.onnx
79 79
@@ -91,7 +91,7 @@ def get_2nd_models(): @@ -91,7 +91,7 @@ def get_2nd_models():
91 pushd $model_name 91 pushd $model_name
92 92
93 rm -rfv test_wavs 93 rm -rfv test_wavs
94 - rm -v README.md 94 + rm -fv README.md
95 mv -v data/lang_char/tokens.txt ./ 95 mv -v data/lang_char/tokens.txt ./
96 rm -rfv data/lang_char 96 rm -rfv data/lang_char
97 97
@@ -119,15 +119,15 @@ def get_1st_models(): @@ -119,15 +119,15 @@ def get_1st_models():
119 short_name="zipformer", 119 short_name="zipformer",
120 cmd=""" 120 cmd="""
121 pushd $model_name 121 pushd $model_name
122 - rm -v decoder-epoch-99-avg-1.int8.onnx  
123 - rm -v encoder-epoch-99-avg-1.onnx  
124 - rm -v joiner-epoch-99-avg-1.onnx  
125 -  
126 - rm -v *.sh  
127 - rm -v bpe.model  
128 - rm -v README.md  
129 - rm -v .gitattributes  
130 - rm -v *state* 122 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  123 + rm -fv encoder-epoch-99-avg-1.onnx
  124 + rm -fv joiner-epoch-99-avg-1.onnx
  125 +
  126 + rm -fv *.sh
  127 + rm -fv bpe.model
  128 + rm -fv README.md
  129 + rm -fv .gitattributes
  130 + rm -fv *state*
131 rm -rfv test_wavs 131 rm -rfv test_wavs
132 132
133 ls -lh 133 ls -lh
@@ -142,12 +142,12 @@ def get_1st_models(): @@ -142,12 +142,12 @@ def get_1st_models():
142 short_name="zipformer2", 142 short_name="zipformer2",
143 cmd=""" 143 cmd="""
144 pushd $model_name 144 pushd $model_name
145 - rm -v encoder-epoch-99-avg-1-chunk-16-left-128.onnx  
146 - rm -v decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx  
147 - rm -v joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx 145 + rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
  146 + rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
  147 + rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx
148 148
149 - rm -v README.md  
150 - rm -v bpe.model 149 + rm -fv README.md
  150 + rm -fv bpe.model
151 rm -rfv test_wavs 151 rm -rfv test_wavs
152 152
153 ls -lh 153 ls -lh
@@ -162,14 +162,14 @@ def get_1st_models(): @@ -162,14 +162,14 @@ def get_1st_models():
162 short_name="zipformer2", 162 short_name="zipformer2",
163 cmd=""" 163 cmd="""
164 pushd $model_name 164 pushd $model_name
165 - rm -v exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx  
166 - rm -v exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx  
167 - rm -v exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx 165 + rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
  166 + rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
  167 + rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx
168 168
169 - rm -v data/lang_char/lexicon.txt  
170 - rm -v data/lang_char/words.txt 169 + rm -fv data/lang_char/lexicon.txt
  170 + rm -fv data/lang_char/words.txt
171 rm -rfv test_wavs 171 rm -rfv test_wavs
172 - rm -v README.md 172 + rm -fv README.md
173 173
174 ls -lh exp/ 174 ls -lh exp/
175 ls -lh data/lang_char 175 ls -lh data/lang_char
@@ -184,11 +184,11 @@ def get_1st_models(): @@ -184,11 +184,11 @@ def get_1st_models():
184 short_name="zipformer", 184 short_name="zipformer",
185 cmd=""" 185 cmd="""
186 pushd $model_name 186 pushd $model_name
187 - rm -v encoder-epoch-29-avg-9-with-averaged-model.onnx  
188 - rm -v decoder-epoch-29-avg-9-with-averaged-model.int8.onnx  
189 - rm -v joiner-epoch-29-avg-9-with-averaged-model.int8.onnx 187 + rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
  188 + rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
  189 + rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx
190 190
191 - rm -v *.sh 191 + rm -fv *.sh
192 rm -rf test_wavs 192 rm -rf test_wavs
193 rm README.md 193 rm README.md
194 194
@@ -204,11 +204,11 @@ def get_1st_models(): @@ -204,11 +204,11 @@ def get_1st_models():
204 short_name="small_zipformer", 204 short_name="small_zipformer",
205 cmd=""" 205 cmd="""
206 pushd $model_name 206 pushd $model_name
207 - rm -v encoder-epoch-99-avg-1.onnx  
208 - rm -v decoder-epoch-99-avg-1.int8.onnx  
209 - rm -v joiner-epoch-99-avg-1.onnx 207 + rm -fv encoder-epoch-99-avg-1.onnx
  208 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  209 + rm -fv joiner-epoch-99-avg-1.onnx
210 210
211 - rm -v *.sh 211 + rm -fv *.sh
212 rm -rf test_wavs 212 rm -rf test_wavs
213 rm README.md 213 rm README.md
214 214
@@ -224,11 +224,11 @@ def get_1st_models(): @@ -224,11 +224,11 @@ def get_1st_models():
224 short_name="small_zipformer", 224 short_name="small_zipformer",
225 cmd=""" 225 cmd="""
226 pushd $model_name 226 pushd $model_name
227 - rm -v encoder-epoch-99-avg-1.onnx  
228 - rm -v decoder-epoch-99-avg-1.int8.onnx  
229 - rm -v joiner-epoch-99-avg-1.onnx 227 + rm -fv encoder-epoch-99-avg-1.onnx
  228 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  229 + rm -fv joiner-epoch-99-avg-1.onnx
230 230
231 - rm -v *.sh 231 + rm -fv *.sh
232 rm -rf test_wavs 232 rm -rf test_wavs
233 rm README.md 233 rm README.md
234 234
@@ -52,15 +52,15 @@ def get_models(): @@ -52,15 +52,15 @@ def get_models():
52 short_name="zipformer", 52 short_name="zipformer",
53 cmd=""" 53 cmd="""
54 pushd $model_name 54 pushd $model_name
55 - rm -v decoder-epoch-99-avg-1.int8.onnx  
56 - rm -v encoder-epoch-99-avg-1.onnx  
57 - rm -v joiner-epoch-99-avg-1.onnx  
58 -  
59 - rm -v *.sh  
60 - rm -v bpe.model  
61 - rm -v README.md  
62 - rm -v .gitattributes  
63 - rm -v *state* 55 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  56 + rm -fv encoder-epoch-99-avg-1.onnx
  57 + rm -fv joiner-epoch-99-avg-1.onnx
  58 +
  59 + rm -fv *.sh
  60 + rm -fv bpe.model
  61 + rm -fv README.md
  62 + rm -fv .gitattributes
  63 + rm -fv *state*
64 rm -rfv test_wavs 64 rm -rfv test_wavs
65 65
66 ls -lh 66 ls -lh
@@ -75,12 +75,12 @@ def get_models(): @@ -75,12 +75,12 @@ def get_models():
75 short_name="zipformer2", 75 short_name="zipformer2",
76 cmd=""" 76 cmd="""
77 pushd $model_name 77 pushd $model_name
78 - rm -v encoder-epoch-99-avg-1-chunk-16-left-128.onnx  
79 - rm -v decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx  
80 - rm -v joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx 78 + rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
  79 + rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
  80 + rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx
81 81
82 - rm -v README.md  
83 - rm -v bpe.model 82 + rm -fv README.md
  83 + rm -fv bpe.model
84 rm -rfv test_wavs 84 rm -rfv test_wavs
85 85
86 ls -lh 86 ls -lh
@@ -95,14 +95,14 @@ def get_models(): @@ -95,14 +95,14 @@ def get_models():
95 short_name="zipformer2", 95 short_name="zipformer2",
96 cmd=""" 96 cmd="""
97 pushd $model_name 97 pushd $model_name
98 - rm -v exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx  
99 - rm -v exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx  
100 - rm -v exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx 98 + rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
  99 + rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
  100 + rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx
101 101
102 - rm -v data/lang_char/lexicon.txt  
103 - rm -v data/lang_char/words.txt 102 + rm -fv data/lang_char/lexicon.txt
  103 + rm -fv data/lang_char/words.txt
104 rm -rfv test_wavs 104 rm -rfv test_wavs
105 - rm -v README.md 105 + rm -fv README.md
106 106
107 ls -lh exp/ 107 ls -lh exp/
108 ls -lh data/lang_char 108 ls -lh data/lang_char
@@ -117,12 +117,12 @@ def get_models(): @@ -117,12 +117,12 @@ def get_models():
117 short_name="zipformer", 117 short_name="zipformer",
118 cmd=""" 118 cmd="""
119 pushd $model_name 119 pushd $model_name
120 - rm -v encoder-epoch-29-avg-9-with-averaged-model.onnx  
121 - rm -v decoder-epoch-29-avg-9-with-averaged-model.int8.onnx  
122 - rm -v joiner-epoch-29-avg-9-with-averaged-model.int8.onnx 120 + rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
  121 + rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
  122 + rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx
123 123
124 - rm -v *.sh  
125 - rm -rf test_wavs 124 + rm -fv *.sh
  125 + rm -rfv test_wavs
126 rm README.md 126 rm README.md
127 127
128 ls -lh 128 ls -lh
@@ -137,11 +137,11 @@ def get_models(): @@ -137,11 +137,11 @@ def get_models():
137 short_name="small_zipformer", 137 short_name="small_zipformer",
138 cmd=""" 138 cmd="""
139 pushd $model_name 139 pushd $model_name
140 - rm -v encoder-epoch-99-avg-1.onnx  
141 - rm -v decoder-epoch-99-avg-1.int8.onnx  
142 - rm -v joiner-epoch-99-avg-1.onnx 140 + rm -fv encoder-epoch-99-avg-1.onnx
  141 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  142 + rm -fv joiner-epoch-99-avg-1.onnx
143 143
144 - rm -v *.sh 144 + rm -fv *.sh
145 rm -rf test_wavs 145 rm -rf test_wavs
146 rm README.md 146 rm README.md
147 147
@@ -157,11 +157,11 @@ def get_models(): @@ -157,11 +157,11 @@ def get_models():
157 short_name="small_zipformer", 157 short_name="small_zipformer",
158 cmd=""" 158 cmd="""
159 pushd $model_name 159 pushd $model_name
160 - rm -v encoder-epoch-99-avg-1.onnx  
161 - rm -v decoder-epoch-99-avg-1.int8.onnx  
162 - rm -v joiner-epoch-99-avg-1.onnx 160 + rm -fv encoder-epoch-99-avg-1.onnx
  161 + rm -fv decoder-epoch-99-avg-1.int8.onnx
  162 + rm -fv joiner-epoch-99-avg-1.onnx
163 163
164 - rm -v *.sh 164 + rm -fv *.sh
165 rm -rf test_wavs 165 rm -rf test_wavs
166 rm README.md 166 rm README.md
167 167
@@ -103,6 +103,7 @@ def get_piper_models() -> List[TtsModel]: @@ -103,6 +103,7 @@ def get_piper_models() -> List[TtsModel]:
103 TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"), 103 TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
104 TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"), 104 TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
105 TtsModel(model_dir="vits-piper-cs_CZ-jirka-medium"), 105 TtsModel(model_dir="vits-piper-cs_CZ-jirka-medium"),
  106 + TtsModel(model_dir="vits-piper-cy_GB-gwryw_gogleddol-medium"),
106 TtsModel(model_dir="vits-piper-da_DK-talesyntese-medium"), 107 TtsModel(model_dir="vits-piper-da_DK-talesyntese-medium"),
107 TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low"), 108 TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low"),
108 TtsModel(model_dir="vits-piper-de_DE-karlsson-low"), 109 TtsModel(model_dir="vits-piper-de_DE-karlsson-low"),
@@ -126,15 +127,19 @@ def get_piper_models() -> List[TtsModel]: @@ -126,15 +127,19 @@ def get_piper_models() -> List[TtsModel]:
126 TtsModel(model_dir="vits-piper-en_GB-semaine-medium"), 127 TtsModel(model_dir="vits-piper-en_GB-semaine-medium"),
127 TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low"), 128 TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low"),
128 TtsModel(model_dir="vits-piper-en_GB-southern_english_female-medium"), 129 TtsModel(model_dir="vits-piper-en_GB-southern_english_female-medium"),
  130 + TtsModel(model_dir="vits-piper-en_GB-southern_english_male-medium"),
129 TtsModel(model_dir="vits-piper-en_GB-sweetbbak-amy"), 131 TtsModel(model_dir="vits-piper-en_GB-sweetbbak-amy"),
130 TtsModel(model_dir="vits-piper-en_GB-vctk-medium"), 132 TtsModel(model_dir="vits-piper-en_GB-vctk-medium"),
131 TtsModel(model_dir="vits-piper-en_US-amy-low"), 133 TtsModel(model_dir="vits-piper-en_US-amy-low"),
132 TtsModel(model_dir="vits-piper-en_US-amy-medium"), 134 TtsModel(model_dir="vits-piper-en_US-amy-medium"),
133 TtsModel(model_dir="vits-piper-en_US-arctic-medium"), 135 TtsModel(model_dir="vits-piper-en_US-arctic-medium"),
  136 + TtsModel(model_dir="vits-piper-en_US-bryce-medium"),
134 TtsModel(model_dir="vits-piper-en_US-danny-low"), 137 TtsModel(model_dir="vits-piper-en_US-danny-low"),
135 TtsModel(model_dir="vits-piper-en_US-glados"), 138 TtsModel(model_dir="vits-piper-en_US-glados"),
  139 + TtsModel(model_dir="vits-piper-en_US-hfc_female-medium"),
136 TtsModel(model_dir="vits-piper-en_US-hfc_male-medium"), 140 TtsModel(model_dir="vits-piper-en_US-hfc_male-medium"),
137 TtsModel(model_dir="vits-piper-en_US-joe-medium"), 141 TtsModel(model_dir="vits-piper-en_US-joe-medium"),
  142 + TtsModel(model_dir="vits-piper-en_US-john-medium"),
138 TtsModel(model_dir="vits-piper-en_US-kathleen-low"), 143 TtsModel(model_dir="vits-piper-en_US-kathleen-low"),
139 TtsModel(model_dir="vits-piper-en_US-kristin-medium"), 144 TtsModel(model_dir="vits-piper-en_US-kristin-medium"),
140 TtsModel(model_dir="vits-piper-en_US-kusal-medium"), 145 TtsModel(model_dir="vits-piper-en_US-kusal-medium"),
@@ -146,6 +151,7 @@ def get_piper_models() -> List[TtsModel]: @@ -146,6 +151,7 @@ def get_piper_models() -> List[TtsModel]:
146 TtsModel(model_dir="vits-piper-en_US-libritts_r-medium"), 151 TtsModel(model_dir="vits-piper-en_US-libritts_r-medium"),
147 TtsModel(model_dir="vits-piper-en_US-ljspeech-high"), 152 TtsModel(model_dir="vits-piper-en_US-ljspeech-high"),
148 TtsModel(model_dir="vits-piper-en_US-ljspeech-medium"), 153 TtsModel(model_dir="vits-piper-en_US-ljspeech-medium"),
  154 + TtsModel(model_dir="vits-piper-en_US-norman-medium"),
149 TtsModel(model_dir="vits-piper-en_US-ryan-high"), 155 TtsModel(model_dir="vits-piper-en_US-ryan-high"),
150 TtsModel(model_dir="vits-piper-en_US-ryan-low"), 156 TtsModel(model_dir="vits-piper-en_US-ryan-low"),
151 TtsModel(model_dir="vits-piper-en_US-ryan-medium"), 157 TtsModel(model_dir="vits-piper-en_US-ryan-medium"),
@@ -162,6 +168,7 @@ def get_piper_models() -> List[TtsModel]: @@ -162,6 +168,7 @@ def get_piper_models() -> List[TtsModel]:
162 # TtsModel(model_dir="vits-piper-fr_FR-mls-medium"), 168 # TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
163 TtsModel(model_dir="vits-piper-fr_FR-siwis-low"), 169 TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
164 TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"), 170 TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
  171 + TtsModel(model_dir="vits-piper-fr_FR-tom-medium"),
165 TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"), 172 TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
166 TtsModel(model_dir="vits-piper-hu_HU-anna-medium"), 173 TtsModel(model_dir="vits-piper-hu_HU-anna-medium"),
167 TtsModel(model_dir="vits-piper-hu_HU-berta-medium"), 174 TtsModel(model_dir="vits-piper-hu_HU-berta-medium"),
@@ -170,6 +177,7 @@ def get_piper_models() -> List[TtsModel]: @@ -170,6 +177,7 @@ def get_piper_models() -> List[TtsModel]:
170 TtsModel(model_dir="vits-piper-is_IS-salka-medium"), 177 TtsModel(model_dir="vits-piper-is_IS-salka-medium"),
171 TtsModel(model_dir="vits-piper-is_IS-steinn-medium"), 178 TtsModel(model_dir="vits-piper-is_IS-steinn-medium"),
172 TtsModel(model_dir="vits-piper-is_IS-ugla-medium"), 179 TtsModel(model_dir="vits-piper-is_IS-ugla-medium"),
  180 + TtsModel(model_dir="vits-piper-it_IT-paola-medium"),
173 TtsModel(model_dir="vits-piper-it_IT-riccardo-x_low"), 181 TtsModel(model_dir="vits-piper-it_IT-riccardo-x_low"),
174 TtsModel(model_dir="vits-piper-ka_GE-natia-medium"), 182 TtsModel(model_dir="vits-piper-ka_GE-natia-medium"),
175 TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"), 183 TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"),
@@ -204,6 +212,7 @@ def get_piper_models() -> List[TtsModel]: @@ -204,6 +212,7 @@ def get_piper_models() -> List[TtsModel]:
204 TtsModel(model_dir="vits-piper-sw_CD-lanfrica-medium"), 212 TtsModel(model_dir="vits-piper-sw_CD-lanfrica-medium"),
205 TtsModel(model_dir="vits-piper-tr_TR-dfki-medium"), 213 TtsModel(model_dir="vits-piper-tr_TR-dfki-medium"),
206 TtsModel(model_dir="vits-piper-tr_TR-fahrettin-medium"), 214 TtsModel(model_dir="vits-piper-tr_TR-fahrettin-medium"),
  215 + TtsModel(model_dir="vits-piper-tr_TR-fettah-medium"),
207 TtsModel(model_dir="vits-piper-uk_UA-lada-x_low"), 216 TtsModel(model_dir="vits-piper-uk_UA-lada-x_low"),
208 TtsModel(model_dir="vits-piper-uk_UA-ukrainian_tts-medium"), 217 TtsModel(model_dir="vits-piper-uk_UA-ukrainian_tts-medium"),
209 TtsModel(model_dir="vits-piper-vi_VN-25hours_single-low"), 218 TtsModel(model_dir="vits-piper-vi_VN-25hours_single-low"),