Fangjun Kuang
Committed by GitHub

Add C++ runtime for vocos (#2014)

正在显示 62 个修改的文件 包含 558 行增加162 行删除
@@ -121,19 +121,19 @@ rm -rf kokoro-en-v0_19 @@ -121,19 +121,19 @@ rm -rf kokoro-en-v0_19
121 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 121 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
122 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 122 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
123 rm matcha-icefall-en_US-ljspeech.tar.bz2 123 rm matcha-icefall-en_US-ljspeech.tar.bz2
124 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 124 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
125 125
126 node ./test_tts_non_streaming_matcha_icefall_en.js 126 node ./test_tts_non_streaming_matcha_icefall_en.js
127 -rm hifigan_v2.onnx 127 +rm vocos-22khz-univ.onnx
128 rm -rf matcha-icefall-en_US-ljspeech 128 rm -rf matcha-icefall-en_US-ljspeech
129 129
130 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 130 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
131 tar xvf matcha-icefall-zh-baker.tar.bz2 131 tar xvf matcha-icefall-zh-baker.tar.bz2
132 rm matcha-icefall-zh-baker.tar.bz2 132 rm matcha-icefall-zh-baker.tar.bz2
133 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 133 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
134 134
135 node ./test_tts_non_streaming_matcha_icefall_zh.js 135 node ./test_tts_non_streaming_matcha_icefall_zh.js
136 -rm hifigan_v2.onnx 136 +rm vocos-22khz-univ.onnx
137 rm -rf matcha-icefall-zh-baker 137 rm -rf matcha-icefall-zh-baker
138 ls -lh *.wav 138 ls -lh *.wav
139 139
@@ -42,12 +42,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -42,12 +42,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
42 tar xvf matcha-icefall-zh-baker.tar.bz2 42 tar xvf matcha-icefall-zh-baker.tar.bz2
43 rm matcha-icefall-zh-baker.tar.bz2 43 rm matcha-icefall-zh-baker.tar.bz2
44 44
45 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 45 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
46 46
47 node ./test-offline-tts-matcha-zh.js 47 node ./test-offline-tts-matcha-zh.js
48 48
49 rm -rf matcha-icefall-zh-baker 49 rm -rf matcha-icefall-zh-baker
50 -rm hifigan_v2.onnx 50 +rm vocos-22khz-univ.onnx
  51 +
51 52
52 echo "---" 53 echo "---"
53 54
@@ -55,12 +56,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -55,12 +56,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
55 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 56 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
56 rm matcha-icefall-en_US-ljspeech.tar.bz2 57 rm matcha-icefall-en_US-ljspeech.tar.bz2
57 58
58 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 59 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
59 60
60 node ./test-offline-tts-matcha-en.js 61 node ./test-offline-tts-matcha-en.js
61 62
62 rm -rf matcha-icefall-en_US-ljspeech 63 rm -rf matcha-icefall-en_US-ljspeech
63 -rm hifigan_v2.onnx 64 +rm vocos-22khz-univ.onnx
64 65
65 echo "---" 66 echo "---"
66 67
@@ -50,11 +50,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -50,11 +50,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
50 tar xvf matcha-tts-fa_en-musa.tar.bz2 50 tar xvf matcha-tts-fa_en-musa.tar.bz2
51 rm matcha-tts-fa_en-musa.tar.bz2 51 rm matcha-tts-fa_en-musa.tar.bz2
52 52
53 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 53 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
  54 +
54 55
55 $EXE \ 56 $EXE \
56 --matcha-acoustic-model=./matcha-tts-fa_en-musa/model.onnx \ 57 --matcha-acoustic-model=./matcha-tts-fa_en-musa/model.onnx \
57 - --matcha-vocoder=./hifigan_v2.onnx \ 58 + --matcha-vocoder=./vocos-22khz-univ.onnx \
58 --matcha-tokens=./matcha-tts-fa_en-musa/tokens.txt \ 59 --matcha-tokens=./matcha-tts-fa_en-musa/tokens.txt \
59 --matcha-data-dir=./matcha-tts-fa_en-musa/espeak-ng-data \ 60 --matcha-data-dir=./matcha-tts-fa_en-musa/espeak-ng-data \
60 --output-filename=./tts/test-matcha-fa-en-musa.wav \ 61 --output-filename=./tts/test-matcha-fa-en-musa.wav \
@@ -62,7 +63,7 @@ $EXE \ @@ -62,7 +63,7 @@ $EXE \
62 "How are you doing today? این یک نمونه ی تست فارسی است. This is a test." 63 "How are you doing today? این یک نمونه ی تست فارسی است. This is a test."
63 64
64 rm -rf matcha-tts-fa_en-musa 65 rm -rf matcha-tts-fa_en-musa
65 -rm hifigan_v2.onnx 66 +rm vocos-22khz-univ.onnx
66 ls -lh tts/*.wav 67 ls -lh tts/*.wav
67 68
68 log "------------------------------------------------------------" 69 log "------------------------------------------------------------"
@@ -72,11 +73,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -72,11 +73,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
72 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 73 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
73 rm matcha-icefall-en_US-ljspeech.tar.bz2 74 rm matcha-icefall-en_US-ljspeech.tar.bz2
74 75
75 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 76 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
  77 +
76 78
77 $EXE \ 79 $EXE \
78 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 80 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
79 - --matcha-vocoder=./hifigan_v2.onnx \ 81 + --matcha-vocoder=./vocos-22khz-univ.onnx \
80 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 82 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
81 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 83 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
82 --num-threads=2 \ 84 --num-threads=2 \
@@ -84,7 +86,7 @@ $EXE \ @@ -84,7 +86,7 @@ $EXE \
84 --debug=1 \ 86 --debug=1 \
85 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." 87 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
86 88
87 -rm hifigan_v2.onnx 89 +rm vocos-22khz-univ.onnx
88 rm -rf matcha-icefall-en_US-ljspeech 90 rm -rf matcha-icefall-en_US-ljspeech
89 ls -lh tts/*.wav 91 ls -lh tts/*.wav
90 92
@@ -95,11 +97,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -95,11 +97,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
95 tar xvf matcha-icefall-zh-baker.tar.bz2 97 tar xvf matcha-icefall-zh-baker.tar.bz2
96 rm matcha-icefall-zh-baker.tar.bz2 98 rm matcha-icefall-zh-baker.tar.bz2
97 99
98 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 100 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
99 101
100 $EXE \ 102 $EXE \
101 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 103 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
102 - --matcha-vocoder=./hifigan_v2.onnx \ 104 + --matcha-vocoder=./vocos-22khz-univ.onnx \
103 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 105 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
104 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 106 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
105 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ 107 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
@@ -110,7 +112,7 @@ $EXE \ @@ -110,7 +112,7 @@ $EXE \
110 112
111 $EXE \ 113 $EXE \
112 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 114 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
113 - --matcha-vocoder=./hifigan_v2.onnx \ 115 + --matcha-vocoder=./vocos-22khz-univ.onnx \
114 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 116 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
115 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 117 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
116 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ 118 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
@@ -119,7 +121,7 @@ $EXE \ @@ -119,7 +121,7 @@ $EXE \
119 --output-filename=./tts/matcha-baker-zh-2.wav \ 121 --output-filename=./tts/matcha-baker-zh-2.wav \
120 "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" 122 "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。"
121 123
122 -rm hifigan_v2.onnx 124 +rm vocos-22khz-univ.onnx
123 rm -rf matcha-icefall-zh-baker 125 rm -rf matcha-icefall-zh-baker
124 126
125 log "------------------------------------------------------------" 127 log "------------------------------------------------------------"
@@ -320,18 +320,18 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -320,18 +320,18 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
320 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 320 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
321 rm matcha-icefall-en_US-ljspeech.tar.bz2 321 rm matcha-icefall-en_US-ljspeech.tar.bz2
322 322
323 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 323 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
324 324
325 python3 ./python-api-examples/offline-tts.py \ 325 python3 ./python-api-examples/offline-tts.py \
326 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 326 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
327 - --matcha-vocoder=./hifigan_v2.onnx \ 327 + --matcha-vocoder=./vocos-22khz-univ.onnx \
328 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 328 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
329 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 329 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
330 --output-filename=./tts/test-matcha-ljspeech-en.wav \ 330 --output-filename=./tts/test-matcha-ljspeech-en.wav \
331 --num-threads=2 \ 331 --num-threads=2 \
332 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." 332 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
333 333
334 -rm hifigan_v2.onnx 334 +rm vocos-22khz-univ.onnx
335 rm -rf matcha-icefall-en_US-ljspeech 335 rm -rf matcha-icefall-en_US-ljspeech
336 336
337 log "matcha-baker-zh test" 337 log "matcha-baker-zh test"
@@ -340,11 +340,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -340,11 +340,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
340 tar xvf matcha-icefall-zh-baker.tar.bz2 340 tar xvf matcha-icefall-zh-baker.tar.bz2
341 rm matcha-icefall-zh-baker.tar.bz2 341 rm matcha-icefall-zh-baker.tar.bz2
342 342
343 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 343 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
344 344
345 python3 ./python-api-examples/offline-tts.py \ 345 python3 ./python-api-examples/offline-tts.py \
346 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 346 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
347 - --matcha-vocoder=./hifigan_v2.onnx \ 347 + --matcha-vocoder=./vocos-22khz-univ.onnx \
348 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 348 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
349 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 349 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
350 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ 350 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
@@ -353,7 +353,7 @@ python3 ./python-api-examples/offline-tts.py \ @@ -353,7 +353,7 @@ python3 ./python-api-examples/offline-tts.py \
353 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" 353 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
354 354
355 rm -rf matcha-icefall-zh-baker 355 rm -rf matcha-icefall-zh-baker
356 -rm hifigan_v2.onnx 356 +rm vocos-22khz-univ.onnx
357 357
358 log "vits-ljs test" 358 log "vits-ljs test"
359 359
@@ -228,7 +228,7 @@ jobs: @@ -228,7 +228,7 @@ jobs:
228 tar xvf matcha-icefall-zh-baker.tar.bz2 228 tar xvf matcha-icefall-zh-baker.tar.bz2
229 rm matcha-icefall-zh-baker.tar.bz2 229 rm matcha-icefall-zh-baker.tar.bz2
230 230
231 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 231 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
232 232
233 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH 233 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
234 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH 234 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
@@ -237,7 +237,7 @@ jobs: @@ -237,7 +237,7 @@ jobs:
237 237
238 rm ./matcha-tts-zh-c-api 238 rm ./matcha-tts-zh-c-api
239 rm -rf matcha-icefall-* 239 rm -rf matcha-icefall-*
240 - rm hifigan_v2.onnx 240 + rm vocos-22khz-univ.onnx
241 241
242 - name: Test Matcha TTS (en) 242 - name: Test Matcha TTS (en)
243 shell: bash 243 shell: bash
@@ -252,7 +252,7 @@ jobs: @@ -252,7 +252,7 @@ jobs:
252 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 252 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
253 rm matcha-icefall-en_US-ljspeech.tar.bz2 253 rm matcha-icefall-en_US-ljspeech.tar.bz2
254 254
255 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 255 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
256 256
257 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH 257 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
258 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH 258 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
@@ -261,7 +261,7 @@ jobs: @@ -261,7 +261,7 @@ jobs:
261 261
262 rm ./matcha-tts-en-c-api 262 rm ./matcha-tts-en-c-api
263 rm -rf matcha-icefall-* 263 rm -rf matcha-icefall-*
264 - rm hifigan_v2.onnx 264 + rm vocos-22khz-univ.onnx
265 265
266 - uses: actions/upload-artifact@v4 266 - uses: actions/upload-artifact@v4
267 with: 267 with:
@@ -237,7 +237,7 @@ jobs: @@ -237,7 +237,7 @@ jobs:
237 tar xvf matcha-icefall-zh-baker.tar.bz2 237 tar xvf matcha-icefall-zh-baker.tar.bz2
238 rm matcha-icefall-zh-baker.tar.bz2 238 rm matcha-icefall-zh-baker.tar.bz2
239 239
240 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 240 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
241 241
242 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH 242 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
243 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH 243 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
@@ -245,7 +245,7 @@ jobs: @@ -245,7 +245,7 @@ jobs:
245 ./matcha-tts-zh-cxx-api 245 ./matcha-tts-zh-cxx-api
246 246
247 rm -rf matcha-icefall-* 247 rm -rf matcha-icefall-*
248 - rm hifigan_v2.onnx 248 + rm vocos-22khz-univ.onnx
249 rm matcha-tts-zh-cxx-api 249 rm matcha-tts-zh-cxx-api
250 250
251 - name: Test Matcha TTS (en) 251 - name: Test Matcha TTS (en)
@@ -262,7 +262,7 @@ jobs: @@ -262,7 +262,7 @@ jobs:
262 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 262 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
263 rm matcha-icefall-en_US-ljspeech.tar.bz2 263 rm matcha-icefall-en_US-ljspeech.tar.bz2
264 264
265 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 265 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
266 266
267 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH 267 export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
268 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH 268 export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
@@ -271,7 +271,7 @@ jobs: @@ -271,7 +271,7 @@ jobs:
271 271
272 rm matcha-tts-en-cxx-api 272 rm matcha-tts-en-cxx-api
273 rm -rf matcha-icefall-* 273 rm -rf matcha-icefall-*
274 - rm hifigan_v2.onnx 274 + rm vocos-22khz-univ.onnx
275 275
276 - uses: actions/upload-artifact@v4 276 - uses: actions/upload-artifact@v4
277 with: 277 with:
@@ -265,7 +265,7 @@ jobs: @@ -265,7 +265,7 @@ jobs:
265 rm -rf kokoro-en-* 265 rm -rf kokoro-en-*
266 266
267 rm -rf matcha-icefall-* 267 rm -rf matcha-icefall-*
268 - rm hifigan_v2.onnx 268 + rm vocos-22khz-univ.onnx
269 269
270 ./run-non-streaming-tts-piper-en.sh 270 ./run-non-streaming-tts-piper-en.sh
271 rm -rf vits-piper-* 271 rm -rf vits-piper-*
@@ -262,7 +262,7 @@ class MainActivity : AppCompatActivity() { @@ -262,7 +262,7 @@ class MainActivity : AppCompatActivity() {
262 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker 262 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
263 // modelDir = "matcha-icefall-zh-baker" 263 // modelDir = "matcha-icefall-zh-baker"
264 // acousticModelName = "model-steps-3.onnx" 264 // acousticModelName = "model-steps-3.onnx"
265 - // vocoder = "hifigan_v2.onnx" 265 + // vocoder = "vocos-22khz-univ.onnx"
266 // lexicon = "lexicon.txt" 266 // lexicon = "lexicon.txt"
267 // dictDir = "matcha-icefall-zh-baker/dict" 267 // dictDir = "matcha-icefall-zh-baker/dict"
268 268
@@ -271,7 +271,7 @@ class MainActivity : AppCompatActivity() { @@ -271,7 +271,7 @@ class MainActivity : AppCompatActivity() {
271 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker 271 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
272 // modelDir = "matcha-icefall-en_US-ljspeech" 272 // modelDir = "matcha-icefall-en_US-ljspeech"
273 // acousticModelName = "model-steps-3.onnx" 273 // acousticModelName = "model-steps-3.onnx"
274 - // vocoder = "hifigan_v2.onnx" 274 + // vocoder = "vocos-22khz-univ.onnx"
275 // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" 275 // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
276 276
277 // Example 9 277 // Example 9
@@ -131,7 +131,7 @@ object TtsEngine { @@ -131,7 +131,7 @@ object TtsEngine {
131 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker 131 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
132 // modelDir = "matcha-icefall-zh-baker" 132 // modelDir = "matcha-icefall-zh-baker"
133 // acousticModelName = "model-steps-3.onnx" 133 // acousticModelName = "model-steps-3.onnx"
134 - // vocoder = "hifigan_v2.onnx" 134 + // vocoder = "vocos-22khz-univ.onnx"
135 // lexicon = "lexicon.txt" 135 // lexicon = "lexicon.txt"
136 // dictDir = "matcha-icefall-zh-baker/dict" 136 // dictDir = "matcha-icefall-zh-baker/dict"
137 // lang = "zho" 137 // lang = "zho"
@@ -141,7 +141,7 @@ object TtsEngine { @@ -141,7 +141,7 @@ object TtsEngine {
141 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker 141 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
142 // modelDir = "matcha-icefall-en_US-ljspeech" 142 // modelDir = "matcha-icefall-en_US-ljspeech"
143 // acousticModelName = "model-steps-3.onnx" 143 // acousticModelName = "model-steps-3.onnx"
144 - // vocoder = "hifigan_v2.onnx" 144 + // vocoder = "vocos-22khz-univ.onnx"
145 // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" 145 // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
146 // lang = "eng" 146 // lang = "eng"
147 147
@@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
13 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 13 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
14 rm matcha-icefall-en_US-ljspeech.tar.bz2 14 rm matcha-icefall-en_US-ljspeech.tar.bz2
15 15
16 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 16 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 17
18 ./matcha-tts-en-c-api 18 ./matcha-tts-en-c-api
19 19
@@ -40,7 +40,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -40,7 +40,7 @@ int32_t main(int32_t argc, char *argv[]) {
40 config.model.matcha.acoustic_model = 40 config.model.matcha.acoustic_model =
41 "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; 41 "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
42 42
43 - config.model.matcha.vocoder = "./hifigan_v2.onnx"; 43 + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";
44 44
45 config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; 45 config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
46 46
@@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
13 tar xvf matcha-icefall-zh-baker.tar.bz2 13 tar xvf matcha-icefall-zh-baker.tar.bz2
14 rm matcha-icefall-zh-baker.tar.bz2 14 rm matcha-icefall-zh-baker.tar.bz2
15 15
16 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 16 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 17
18 ./matcha-tts-zh-c-api 18 ./matcha-tts-zh-c-api
19 19
@@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) {
39 memset(&config, 0, sizeof(config)); 39 memset(&config, 0, sizeof(config));
40 config.model.matcha.acoustic_model = 40 config.model.matcha.acoustic_model =
41 "./matcha-icefall-zh-baker/model-steps-3.onnx"; 41 "./matcha-icefall-zh-baker/model-steps-3.onnx";
42 - config.model.matcha.vocoder = "./hifigan_v2.onnx"; 42 + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";
43 config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; 43 config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
44 config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; 44 config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
45 config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; 45 config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
@@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
13 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 13 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
14 rm matcha-icefall-en_US-ljspeech.tar.bz2 14 rm matcha-icefall-en_US-ljspeech.tar.bz2
15 15
16 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 16 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 17
18 ./matcha-tts-en-cxx-api 18 ./matcha-tts-en-cxx-api
19 19
@@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) {
39 config.model.matcha.acoustic_model = 39 config.model.matcha.acoustic_model =
40 "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; 40 "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
41 41
42 - config.model.matcha.vocoder = "./hifigan_v2.onnx"; 42 + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";
43 43
44 config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; 44 config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
45 45
@@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
13 tar xvf matcha-icefall-zh-baker.tar.bz2 13 tar xvf matcha-icefall-zh-baker.tar.bz2
14 rm matcha-icefall-zh-baker.tar.bz2 14 rm matcha-icefall-zh-baker.tar.bz2
15 15
16 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 16 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 17
18 ./matcha-tts-zh-cxx-api 18 ./matcha-tts-zh-cxx-api
19 19
@@ -37,7 +37,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -37,7 +37,7 @@ int32_t main(int32_t argc, char *argv[]) {
37 OfflineTtsConfig config; 37 OfflineTtsConfig config;
38 config.model.matcha.acoustic_model = 38 config.model.matcha.acoustic_model =
39 "./matcha-icefall-zh-baker/model-steps-3.onnx"; 39 "./matcha-icefall-zh-baker/model-steps-3.onnx";
40 - config.model.matcha.vocoder = "./hifigan_v2.onnx"; 40 + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx";
41 config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; 41 config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
42 config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; 42 config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
43 config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; 43 config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
@@ -14,14 +14,14 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -14,14 +14,14 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
14 rm matcha-icefall-en_US-ljspeech.tar.bz2 14 rm matcha-icefall-en_US-ljspeech.tar.bz2
15 fi 15 fi
16 16
17 -if [ ! -f ./hifigan_v2.onnx ]; then  
18 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 17 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
19 fi 19 fi
20 20
21 dart run \ 21 dart run \
22 ./bin/matcha-en.dart \ 22 ./bin/matcha-en.dart \
23 --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 23 --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
24 - --vocoder ./hifigan_v2.onnx \ 24 + --vocoder ./vocos-22khz-univ.onnx \
25 --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \ 25 --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \
26 --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 26 --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \
27 --sid 0 \ 27 --sid 0 \
@@ -13,14 +13,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -13,14 +13,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
13 rm matcha-icefall-zh-baker.tar.bz2 13 rm matcha-icefall-zh-baker.tar.bz2
14 fi 14 fi
15 15
16 -if [ ! -f ./hifigan_v2.onnx ]; then  
17 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 16 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  17 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
18 fi 18 fi
19 19
20 dart run \ 20 dart run \
21 ./bin/matcha-zh.dart \ 21 ./bin/matcha-zh.dart \
22 --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ 22 --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
23 - --vocoder ./hifigan_v2.onnx \ 23 + --vocoder ./vocos-22khz-univ.onnx \
24 --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ 24 --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
25 --tokens ./matcha-icefall-zh-baker/tokens.txt \ 25 --tokens ./matcha-icefall-zh-baker/tokens.txt \
26 --dict-dir ./matcha-icefall-zh-baker/dict \ 26 --dict-dir ./matcha-icefall-zh-baker/dict \
@@ -33,7 +33,7 @@ dart run \ @@ -33,7 +33,7 @@ dart run \
33 dart run \ 33 dart run \
34 ./bin/matcha-zh.dart \ 34 ./bin/matcha-zh.dart \
35 --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ 35 --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \
36 - --vocoder ./hifigan_v2.onnx \ 36 + --vocoder ./vocos-22khz-univ.onnx \
37 --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ 37 --lexicon ./matcha-icefall-zh-baker/lexicon.txt \
38 --tokens ./matcha-icefall-zh-baker/tokens.txt \ 38 --tokens ./matcha-icefall-zh-baker/tokens.txt \
39 --dict-dir ./matcha-icefall-zh-baker/dict \ 39 --dict-dir ./matcha-icefall-zh-baker/dict \
@@ -92,11 +92,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -92,11 +92,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
92 tar xvf matcha-icefall-zh-baker.tar.bz2 92 tar xvf matcha-icefall-zh-baker.tar.bz2
93 rm matcha-icefall-zh-baker.tar.bz2 93 rm matcha-icefall-zh-baker.tar.bz2
94 94
95 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 95 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
96 96
97 dotnet run \ 97 dotnet run \
98 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 98 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
99 - --matcha-vocoder=./hifigan_v2.onnx \ 99 + --matcha-vocoder=./vocos-22khz-univ.onnx \
100 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 100 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
101 --tokens=./matcha-icefall-zh-baker/tokens.txt \ 101 --tokens=./matcha-icefall-zh-baker/tokens.txt \
102 --dict-dir=./matcha-icefall-zh-baker/dict \ 102 --dict-dir=./matcha-icefall-zh-baker/dict \
@@ -111,11 +111,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -111,11 +111,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
111 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 111 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
112 rm matcha-icefall-en_US-ljspeech.tar.bz2 112 rm matcha-icefall-en_US-ljspeech.tar.bz2
113 113
114 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 114 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
115 115
116 dotnet run \ 116 dotnet run \
117 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 117 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
118 - --matcha-vocoder=./hifigan_v2.onnx \ 118 + --matcha-vocoder=./vocos-22khz-univ.onnx \
119 --tokens=./matcha-icefall-zh-baker/tokens.txt \ 119 --tokens=./matcha-icefall-zh-baker/tokens.txt \
120 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 120 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
121 --debug=1 \ 121 --debug=1 \
@@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
12 rm matcha-icefall-en_US-ljspeech.tar.bz2 12 rm matcha-icefall-en_US-ljspeech.tar.bz2
13 fi 13 fi
14 14
15 -if [ ! -f ./hifigan_v2.onnx ]; then  
16 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 15 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 fi 17 fi
18 18
19 dotnet run \ 19 dotnet run \
20 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 20 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
21 - --matcha-vocoder=./hifigan_v2.onnx \ 21 + --matcha-vocoder=./vocos-22khz-univ.onnx \
22 --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 22 --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
23 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 23 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
24 --debug=1 \ 24 --debug=1 \
@@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
10 rm matcha-icefall-zh-baker.tar.bz2 10 rm matcha-icefall-zh-baker.tar.bz2
11 fi 11 fi
12 12
13 -if [ ! -f ./hifigan_v2.onnx ]; then  
14 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 13 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
15 fi 15 fi
16 16
17 17
18 dotnet run \ 18 dotnet run \
19 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 19 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
20 - --matcha-vocoder=./hifigan_v2.onnx \ 20 + --matcha-vocoder=./vocos-22khz-univ.onnx \
21 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 21 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
22 --tokens=./matcha-icefall-zh-baker/tokens.txt \ 22 --tokens=./matcha-icefall-zh-baker/tokens.txt \
23 --dict-dir=./matcha-icefall-zh-baker/dict \ 23 --dict-dir=./matcha-icefall-zh-baker/dict \
@@ -85,11 +85,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -85,11 +85,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
85 tar xvf matcha-icefall-zh-baker.tar.bz2 85 tar xvf matcha-icefall-zh-baker.tar.bz2
86 rm matcha-icefall-zh-baker.tar.bz2 86 rm matcha-icefall-zh-baker.tar.bz2
87 87
88 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 88 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
89 89
90 dotnet run \ 90 dotnet run \
91 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 91 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
92 - --matcha-vocoder=./hifigan_v2.onnx \ 92 + --matcha-vocoder=./vocos-22khz-univ.onnx \
93 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 93 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
94 --tokens=./matcha-icefall-zh-baker/tokens.txt \ 94 --tokens=./matcha-icefall-zh-baker/tokens.txt \
95 --dict-dir=./matcha-icefall-zh-baker/dict \ 95 --dict-dir=./matcha-icefall-zh-baker/dict \
@@ -104,11 +104,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -104,11 +104,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
104 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 104 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
105 rm matcha-icefall-en_US-ljspeech.tar.bz2 105 rm matcha-icefall-en_US-ljspeech.tar.bz2
106 106
107 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 107 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
108 108
109 dotnet run \ 109 dotnet run \
110 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 110 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
111 - --matcha-vocoder=./hifigan_v2.onnx \ 111 + --matcha-vocoder=./vocos-22khz-univ.onnx \
112 --tokens=./matcha-icefall-zh-baker/tokens.txt \ 112 --tokens=./matcha-icefall-zh-baker/tokens.txt \
113 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 113 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
114 --debug=1 \ 114 --debug=1 \
@@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
12 rm matcha-icefall-en_US-ljspeech.tar.bz2 12 rm matcha-icefall-en_US-ljspeech.tar.bz2
13 fi 13 fi
14 14
15 -if [ ! -f ./hifigan_v2.onnx ]; then  
16 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 15 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 fi 17 fi
18 18
19 dotnet run \ 19 dotnet run \
20 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 20 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
21 - --matcha-vocoder=./hifigan_v2.onnx \ 21 + --matcha-vocoder=./vocos-22khz-univ.onnx \
22 --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 22 --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
23 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 23 --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
24 --debug=1 \ 24 --debug=1 \
@@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
10 rm matcha-icefall-zh-baker.tar.bz2 10 rm matcha-icefall-zh-baker.tar.bz2
11 fi 11 fi
12 12
13 -if [ ! -f ./hifigan_v2.onnx ]; then  
14 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 13 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
15 fi 15 fi
16 16
17 17
18 dotnet run \ 18 dotnet run \
19 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 19 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
20 - --matcha-vocoder=./hifigan_v2.onnx \ 20 + --matcha-vocoder=./vocos-22khz-univ.onnx \
21 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 21 --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
22 --tokens=./matcha-icefall-zh-baker/tokens.txt \ 22 --tokens=./matcha-icefall-zh-baker/tokens.txt \
23 --dict-dir=./matcha-icefall-zh-baker/dict \ 23 --dict-dir=./matcha-icefall-zh-baker/dict \
@@ -12,8 +12,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -12,8 +12,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
12 rm matcha-icefall-en_US-ljspeech.tar.bz2 12 rm matcha-icefall-en_US-ljspeech.tar.bz2
13 fi 13 fi
14 14
15 -if [ ! -f ./hifigan_v2.onnx ]; then  
16 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 15 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
17 fi 17 fi
18 18
19 go mod tidy 19 go mod tidy
@@ -21,7 +21,7 @@ go build @@ -21,7 +21,7 @@ go build
21 21
22 ./non-streaming-tts \ 22 ./non-streaming-tts \
23 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 23 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
24 - --matcha-vocoder=./hifigan_v2.onnx \ 24 + --matcha-vocoder=./vocos-22khz-univ.onnx \
25 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 25 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
26 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 26 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
27 --debug=1 \ 27 --debug=1 \
@@ -11,8 +11,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -11,8 +11,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
11 rm matcha-icefall-zh-baker.tar.bz2 11 rm matcha-icefall-zh-baker.tar.bz2
12 fi 12 fi
13 13
14 -if [ ! -f ./hifigan_v2.onnx ]; then  
15 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 14 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  15 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
16 fi 16 fi
17 17
18 go mod tidy 18 go mod tidy
@@ -20,7 +20,7 @@ go build @@ -20,7 +20,7 @@ go build
20 20
21 ./non-streaming-tts \ 21 ./non-streaming-tts \
22 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 22 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
23 - --matcha-vocoder=./hifigan_v2.onnx \ 23 + --matcha-vocoder=./vocos-22khz-univ.onnx \
24 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 24 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
25 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 25 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
26 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ 26 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
@@ -159,7 +159,7 @@ function initTts(context: Context): OfflineTts { @@ -159,7 +159,7 @@ function initTts(context: Context): OfflineTts {
159 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker 159 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
160 // modelDir = 'matcha-icefall-zh-baker'; 160 // modelDir = 'matcha-icefall-zh-baker';
161 // acousticModelName = 'model-steps-3.onnx'; 161 // acousticModelName = 'model-steps-3.onnx';
162 - // vocoder = 'hifigan_v2.onnx'; 162 + // vocoder = 'vocos-22khz-univ.onnx';
163 // lexicon = 'lexicon.txt'; 163 // lexicon = 'lexicon.txt';
164 // dictDir = 'dict'; 164 // dictDir = 'dict';
165 // ruleFsts = `date.fst,phone.fst,number.fst`; 165 // ruleFsts = `date.fst,phone.fst,number.fst`;
@@ -169,7 +169,7 @@ function initTts(context: Context): OfflineTts { @@ -169,7 +169,7 @@ function initTts(context: Context): OfflineTts {
169 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker 169 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
170 // modelDir = 'matcha-icefall-en_US-ljspeech'; 170 // modelDir = 'matcha-icefall-en_US-ljspeech';
171 // acousticModelName = 'model-steps-3.onnx'; 171 // acousticModelName = 'model-steps-3.onnx';
172 - // vocoder = 'hifigan_v2.onnx'; 172 + // vocoder = 'vocos-22khz-univ.onnx';
173 // dataDir = 'espeak-ng-data'; 173 // dataDir = 'espeak-ng-data';
174 174
175 // Example 10 175 // Example 10
@@ -131,7 +131,7 @@ func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper { @@ -131,7 +131,7 @@ func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper {
131 // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker 131 // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
132 132
133 let acousticModel = getResource("model-steps-3", "onnx") 133 let acousticModel = getResource("model-steps-3", "onnx")
134 - let vocoder = getResource("hifigan_v2", "onnx") 134 + let vocoder = getResource("vocos-22khz-univ", "onnx")
135 135
136 let tokens = getResource("tokens", "txt") 136 let tokens = getResource("tokens", "txt")
137 let lexicon = getResource("lexicon", "txt") 137 let lexicon = getResource("lexicon", "txt")
@@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaEn { @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaEn {
10 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker 10 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
11 // to download model files 11 // to download model files
12 String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; 12 String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
13 - String vocoder = "./hifigan_v2.onnx"; 13 + String vocoder = "./vocos-22khz-univ.onnx";
14 String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; 14 String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
15 String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"; 15 String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
16 String text = 16 String text =
@@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaZh { @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaZh {
10 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker 10 // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
11 // to download model files 11 // to download model files
12 String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"; 12 String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx";
13 - String vocoder = "./hifigan_v2.onnx"; 13 + String vocoder = "./vocos-22khz-univ.onnx";
14 String tokens = "./matcha-icefall-zh-baker/tokens.txt"; 14 String tokens = "./matcha-icefall-zh-baker/tokens.txt";
15 String lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; 15 String lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
16 String dictDir = "./matcha-icefall-zh-baker/dict"; 16 String dictDir = "./matcha-icefall-zh-baker/dict";
@@ -35,8 +35,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -35,8 +35,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
35 rm matcha-icefall-en_US-ljspeech.tar.bz2 35 rm matcha-icefall-en_US-ljspeech.tar.bz2
36 fi 36 fi
37 37
38 -if [ ! -f ./hifigan_v2.onnx ]; then  
39 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 38 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  39 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
40 fi 40 fi
41 41
42 java \ 42 java \
@@ -34,8 +34,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -34,8 +34,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
34 rm matcha-icefall-zh-baker.tar.bz2 34 rm matcha-icefall-zh-baker.tar.bz2
35 fi 35 fi
36 36
37 -if [ ! -f ./hifigan_v2.onnx ]; then  
38 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 37 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  38 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
39 fi 39 fi
40 40
41 java \ 41 java \
@@ -111,8 +111,8 @@ function testTts() { @@ -111,8 +111,8 @@ function testTts() {
111 rm matcha-icefall-zh-baker.tar.bz2 111 rm matcha-icefall-zh-baker.tar.bz2
112 fi 112 fi
113 113
114 - if [ ! -f ./hifigan_v2.onnx ]; then  
115 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 114 + if [ ! -f ./vocos-22khz-univ.onnx ]; then
  115 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
116 fi 116 fi
117 117
118 if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then 118 if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
@@ -58,7 +58,7 @@ fun testMatcha() { @@ -58,7 +58,7 @@ fun testMatcha() {
58 model=OfflineTtsModelConfig( 58 model=OfflineTtsModelConfig(
59 matcha=OfflineTtsMatchaModelConfig( 59 matcha=OfflineTtsMatchaModelConfig(
60 acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx", 60 acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx",
61 - vocoder="./hifigan_v2.onnx", 61 + vocoder="./vocos-22khz-univ.onnx",
62 tokens="./matcha-icefall-zh-baker/tokens.txt", 62 tokens="./matcha-icefall-zh-baker/tokens.txt",
63 lexicon="./matcha-icefall-zh-baker/lexicon.txt", 63 lexicon="./matcha-icefall-zh-baker/lexicon.txt",
64 dictDir="./matcha-icefall-zh-baker/dict", 64 dictDir="./matcha-icefall-zh-baker/dict",
@@ -513,9 +513,9 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -513,9 +513,9 @@ void CNonStreamingTextToSpeechDlg::Init() {
513 "(c) Switch to the directory matcha-icefall-zh-baker\r\n" 513 "(c) Switch to the directory matcha-icefall-zh-baker\r\n"
514 "(d) Rename model-steps-3.onnx to model.onnx\r\n" 514 "(d) Rename model-steps-3.onnx to model.onnx\r\n"
515 "(e) Download a vocoder model from \r\n" 515 "(e) Download a vocoder model from \r\n"
516 - " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx\r\n"  
517 - "(f) Rename hifigan_v2.onnx to hifigan.onnx\r\n"  
518 - "(g) Remember to put hifigan.onnx in the directory matcha-icefall-zh-baker\r\n" 516 + " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx\r\n"
  517 + "(f) Rename vocos-22khz-univ.onnx to vocos.onnx\r\n"
  518 + "(g) Remember to put vocos.onnx in the directory matcha-icefall-zh-baker\r\n"
519 "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n" 519 "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n"
520 "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n"; 520 "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n";
521 521
@@ -540,10 +540,16 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -540,10 +540,16 @@ void CNonStreamingTextToSpeechDlg::Init() {
540 config.model.kokoro.dict_dir = "./dict"; 540 config.model.kokoro.dict_dir = "./dict";
541 config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt"; 541 config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt";
542 } 542 }
543 - } else if (Exists("./hifigan.onnx")) { 543 + } else if (Exists("./hifigan.onnx") || Exists("./vocos.onnx")) {
544 // it is a matcha tts model 544 // it is a matcha tts model
545 config.model.matcha.acoustic_model = "./model.onnx"; 545 config.model.matcha.acoustic_model = "./model.onnx";
546 - config.model.matcha.vocoder = "./hifigan.onnx"; 546 +
  547 + if (Exists("./hifigan.onnx")) {
  548 + config.model.matcha.vocoder = "./hifigan.onnx";
  549 + } else if (Exists("./vocos.onnx")) {
  550 + config.model.matcha.vocoder = "./vocos.onnx";
  551 + }
  552 +
547 config.model.matcha.tokens = "./tokens.txt"; 553 config.model.matcha.tokens = "./tokens.txt";
548 554
549 if (Exists("./espeak-ng-data/phontab")) { 555 if (Exists("./espeak-ng-data/phontab")) {
@@ -400,7 +400,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -400,7 +400,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
400 tar xf matcha-icefall-en_US-ljspeech.tar.bz2 400 tar xf matcha-icefall-en_US-ljspeech.tar.bz2
401 rm matcha-icefall-en_US-ljspeech.tar.bz2 401 rm matcha-icefall-en_US-ljspeech.tar.bz2
402 402
403 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 403 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
404 404
405 node ./test_tts_non_streaming_matcha_icefall_en.js 405 node ./test_tts_non_streaming_matcha_icefall_en.js
406 ``` 406 ```
@@ -411,7 +411,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -411,7 +411,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
411 tar xvf matcha-icefall-zh-baker.tar.bz2 411 tar xvf matcha-icefall-zh-baker.tar.bz2
412 rm matcha-icefall-zh-baker.tar.bz2 412 rm matcha-icefall-zh-baker.tar.bz2
413 413
414 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 414 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
415 415
416 node ./test_tts_non_streaming_matcha_icefall_zh.js 416 node ./test_tts_non_streaming_matcha_icefall_zh.js
417 ``` 417 ```
@@ -9,7 +9,7 @@ function createOfflineTts() { @@ -9,7 +9,7 @@ function createOfflineTts() {
9 model: { 9 model: {
10 matcha: { 10 matcha: {
11 acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', 11 acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
12 - vocoder: './hifigan_v2.onnx', 12 + vocoder: './vocos-22khz-univ.onnx',
13 lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', 13 lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
14 tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', 14 tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
15 dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', 15 dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
@@ -9,7 +9,7 @@ function createOfflineTts() { @@ -9,7 +9,7 @@ function createOfflineTts() {
9 model: { 9 model: {
10 matcha: { 10 matcha: {
11 acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', 11 acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
12 - vocoder: './hifigan_v2.onnx', 12 + vocoder: './vocos-22khz-univ.onnx',
13 lexicon: './matcha-icefall-zh-baker/lexicon.txt', 13 lexicon: './matcha-icefall-zh-baker/lexicon.txt',
14 tokens: './matcha-icefall-zh-baker/tokens.txt', 14 tokens: './matcha-icefall-zh-baker/tokens.txt',
15 dictDir: './matcha-icefall-zh-baker/dict', 15 dictDir: './matcha-icefall-zh-baker/dict',
@@ -83,7 +83,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -83,7 +83,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
83 tar xvf matcha-icefall-zh-baker.tar.bz2 83 tar xvf matcha-icefall-zh-baker.tar.bz2
84 rm matcha-icefall-zh-baker.tar.bz2 84 rm matcha-icefall-zh-baker.tar.bz2
85 85
86 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 86 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
87 87
88 node ./test-offline-tts-matcha-zh.js 88 node ./test-offline-tts-matcha-zh.js
89 ``` 89 ```
@@ -101,7 +101,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i @@ -101,7 +101,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i
101 tar xf matcha-icefall-en_US-ljspeech.tar.bz2 101 tar xf matcha-icefall-en_US-ljspeech.tar.bz2
102 rm matcha-icefall-en_US-ljspeech.tar.bz2 102 rm matcha-icefall-en_US-ljspeech.tar.bz2
103 103
104 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 104 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
105 105
106 node ./test-offline-tts-matcha-en.js 106 node ./test-offline-tts-matcha-en.js
107 ``` 107 ```
@@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx');
5 function createOfflineTts() { 5 function createOfflineTts() {
6 let offlineTtsMatchaModelConfig = { 6 let offlineTtsMatchaModelConfig = {
7 acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', 7 acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
8 - vocoder: './hifigan_v2.onnx', 8 + vocoder: './vocos-22khz-univ.onnx',
9 lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', 9 lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
10 tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', 10 tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
11 dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', 11 dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
@@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx');
5 function createOfflineTts() { 5 function createOfflineTts() {
6 let offlineTtsMatchaModelConfig = { 6 let offlineTtsMatchaModelConfig = {
7 acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', 7 acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
8 - vocoder: './hifigan_v2.onnx', 8 + vocoder: './vocos-22khz-univ.onnx',
9 lexicon: './matcha-icefall-zh-baker/lexicon.txt', 9 lexicon: './matcha-icefall-zh-baker/lexicon.txt',
10 tokens: './matcha-icefall-zh-baker/tokens.txt', 10 tokens: './matcha-icefall-zh-baker/tokens.txt',
11 dictDir: './matcha-icefall-zh-baker/dict', 11 dictDir: './matcha-icefall-zh-baker/dict',
@@ -115,7 +115,7 @@ var @@ -115,7 +115,7 @@ var
115 Config: TSherpaOnnxOfflineTtsConfig; 115 Config: TSherpaOnnxOfflineTtsConfig;
116 begin 116 begin
117 Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; 117 Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
118 - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; 118 + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
119 Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; 119 Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
120 Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; 120 Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
121 Config.Model.NumThreads := 1; 121 Config.Model.NumThreads := 1;
@@ -21,7 +21,7 @@ var @@ -21,7 +21,7 @@ var
21 Config: TSherpaOnnxOfflineTtsConfig; 21 Config: TSherpaOnnxOfflineTtsConfig;
22 begin 22 begin
23 Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; 23 Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx';
24 - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; 24 + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
25 Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; 25 Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt';
26 Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; 26 Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data';
27 Config.Model.NumThreads := 1; 27 Config.Model.NumThreads := 1;
@@ -115,7 +115,7 @@ var @@ -115,7 +115,7 @@ var
115 Config: TSherpaOnnxOfflineTtsConfig; 115 Config: TSherpaOnnxOfflineTtsConfig;
116 begin 116 begin
117 Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; 117 Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
118 - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; 118 + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
119 Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; 119 Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
120 Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; 120 Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
121 Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; 121 Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
@@ -21,7 +21,7 @@ var @@ -21,7 +21,7 @@ var
21 Config: TSherpaOnnxOfflineTtsConfig; 21 Config: TSherpaOnnxOfflineTtsConfig;
22 begin 22 begin
23 Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; 23 Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx';
24 - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; 24 + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx';
25 Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; 25 Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt';
26 Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; 26 Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt';
27 Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; 27 Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict';
@@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
33 rm matcha-icefall-en_US-ljspeech.tar.bz2 33 rm matcha-icefall-en_US-ljspeech.tar.bz2
34 fi 34 fi
35 35
36 -if [ ! -f ./hifigan_v2.onnx ]; then  
37 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 36 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  37 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
38 fi 38 fi
39 39
40 fpc \ 40 fpc \
@@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
33 rm matcha-icefall-en_US-ljspeech.tar.bz2 33 rm matcha-icefall-en_US-ljspeech.tar.bz2
34 fi 34 fi
35 35
36 -if [ ! -f ./hifigan_v2.onnx ]; then  
37 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 36 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  37 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
38 fi 38 fi
39 39
40 fpc \ 40 fpc \
@@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
32 rm matcha-icefall-zh-baker.tar.bz2 32 rm matcha-icefall-zh-baker.tar.bz2
33 fi 33 fi
34 34
35 -if [ ! -f ./hifigan_v2.onnx ]; then  
36 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 35 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  36 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
37 fi 37 fi
38 38
39 fpc \ 39 fpc \
@@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
32 rm matcha-icefall-zh-baker.tar.bz2 32 rm matcha-icefall-zh-baker.tar.bz2
33 fi 33 fi
34 34
35 -if [ ! -f ./hifigan_v2.onnx ]; then  
36 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 35 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  36 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
37 fi 37 fi
38 38
39 fpc \ 39 fpc \
@@ -59,11 +59,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -59,11 +59,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
59 tar xvf matcha-icefall-zh-baker.tar.bz2 59 tar xvf matcha-icefall-zh-baker.tar.bz2
60 rm matcha-icefall-zh-baker.tar.bz2 60 rm matcha-icefall-zh-baker.tar.bz2
61 61
62 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 62 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
63 63
64 python3 ./python-api-examples/offline-tts-play.py \ 64 python3 ./python-api-examples/offline-tts-play.py \
65 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 65 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
66 - --matcha-vocoder=./hifigan_v2.onnx \ 66 + --matcha-vocoder=./vocos-22khz-univ.onnx \
67 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 67 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
68 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 68 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
69 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ 69 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
@@ -77,11 +77,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -77,11 +77,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
77 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 77 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
78 rm matcha-icefall-en_US-ljspeech.tar.bz2 78 rm matcha-icefall-en_US-ljspeech.tar.bz2
79 79
80 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 80 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
81 81
82 python3 ./python-api-examples/offline-tts-play.py \ 82 python3 ./python-api-examples/offline-tts-play.py \
83 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 83 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
84 - --matcha-vocoder=./hifigan_v2.onnx \ 84 + --matcha-vocoder=./vocos-22khz-univ.onnx \
85 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 85 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
86 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 86 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
87 --output-filename=./test-matcha-ljspeech-en.wav \ 87 --output-filename=./test-matcha-ljspeech-en.wav \
@@ -60,11 +60,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -60,11 +60,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
60 tar xvf matcha-icefall-zh-baker.tar.bz2 60 tar xvf matcha-icefall-zh-baker.tar.bz2
61 rm matcha-icefall-zh-baker.tar.bz2 61 rm matcha-icefall-zh-baker.tar.bz2
62 62
63 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 63 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
64 64
65 python3 ./python-api-examples/offline-tts.py \ 65 python3 ./python-api-examples/offline-tts.py \
66 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ 66 --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
67 - --matcha-vocoder=./hifigan_v2.onnx \ 67 + --matcha-vocoder=./vocos-22khz-univ.onnx \
68 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ 68 --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
69 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 69 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
70 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ 70 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
@@ -78,11 +78,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m @@ -78,11 +78,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m
78 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 78 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
79 rm matcha-icefall-en_US-ljspeech.tar.bz2 79 rm matcha-icefall-en_US-ljspeech.tar.bz2
80 80
81 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 81 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
82 82
83 python3 ./python-api-examples/offline-tts.py \ 83 python3 ./python-api-examples/offline-tts.py \
84 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ 84 --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
85 - --matcha-vocoder=./hifigan_v2.onnx \ 85 + --matcha-vocoder=./vocos-22khz-univ.onnx \
86 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ 86 --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
87 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ 87 --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
88 --output-filename=./test-matcha-ljspeech-en.wav \ 88 --output-filename=./test-matcha-ljspeech-en.wav \
@@ -395,7 +395,7 @@ def get_matcha_models() -> List[TtsModel]: @@ -395,7 +395,7 @@ def get_matcha_models() -> List[TtsModel]:
395 s = [f"{m.model_dir}/{r}" for r in rule_fsts] 395 s = [f"{m.model_dir}/{r}" for r in rule_fsts]
396 m.rule_fsts = ",".join(s) 396 m.rule_fsts = ",".join(s)
397 m.dict_dir = m.model_dir + "/dict" 397 m.dict_dir = m.model_dir + "/dict"
398 - m.vocoder = "hifigan_v2.onnx" 398 + m.vocoder = "vocos-22khz-univ.onnx"
399 399
400 english_persian_models = [ 400 english_persian_models = [
401 TtsModel( 401 TtsModel(
@@ -416,7 +416,7 @@ def get_matcha_models() -> List[TtsModel]: @@ -416,7 +416,7 @@ def get_matcha_models() -> List[TtsModel]:
416 ] 416 ]
417 for m in english_persian_models: 417 for m in english_persian_models:
418 m.data_dir = f"{m.model_dir}/espeak-ng-data" 418 m.data_dir = f"{m.model_dir}/espeak-ng-data"
419 - m.vocoder = "hifigan_v2.onnx" 419 + m.vocoder = "vocos-22khz-univ.onnx"
420 420
421 return chinese_models + english_persian_models 421 return chinese_models + english_persian_models
422 422
@@ -20,8 +20,8 @@ if [ ! -f male/tokens.txt ]; then @@ -20,8 +20,8 @@ if [ ! -f male/tokens.txt ]; then
20 curl -SL --output male/tokens.txt https://huggingface.co/mah92/Musa-FA_EN-Matcha-TTS-Model/resolve/main/tokens_sherpa_with_fa.txt 20 curl -SL --output male/tokens.txt https://huggingface.co/mah92/Musa-FA_EN-Matcha-TTS-Model/resolve/main/tokens_sherpa_with_fa.txt
21 fi 21 fi
22 22
23 -if [ ! -f hifigan_v2.onnx ]; then  
24 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 23 +if [ ! -f vocos-22khz-univ.onnx ]; then
  24 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
25 fi 25 fi
26 26
27 if [ ! -f .add-meta-data.done ]; then 27 if [ ! -f .add-meta-data.done ]; then
@@ -31,14 +31,14 @@ fi @@ -31,14 +31,14 @@ fi
31 31
32 python3 ./test.py \ 32 python3 ./test.py \
33 --am ./female/model.onnx \ 33 --am ./female/model.onnx \
34 - --vocoder ./hifigan_v2.onnx \ 34 + --vocoder ./vocos-22khz-univ.onnx \
35 --tokens ./female/tokens.txt \ 35 --tokens ./female/tokens.txt \
36 --text "This is a test. این یک نمونه ی تست فارسی است." \ 36 --text "This is a test. این یک نمونه ی تست فارسی است." \
37 --out-wav "./female-en-fa.wav" 37 --out-wav "./female-en-fa.wav"
38 38
39 python3 ./test.py \ 39 python3 ./test.py \
40 --am ./male/model.onnx \ 40 --am ./male/model.onnx \
41 - --vocoder ./hifigan_v2.onnx \ 41 + --vocoder ./vocos-22khz-univ.onnx \
42 --tokens ./male/tokens.txt \ 42 --tokens ./male/tokens.txt \
43 --text "This is a test. این یک نمونه ی تست فارسی است." \ 43 --text "This is a test. این یک نمونه ی تست فارسی است." \
44 --out-wav "./male-en-fa.wav" 44 --out-wav "./male-en-fa.wav"
@@ -183,6 +183,8 @@ if(SHERPA_ONNX_ENABLE_TTS) @@ -183,6 +183,8 @@ if(SHERPA_ONNX_ENABLE_TTS)
183 offline-tts-vits-model.cc 183 offline-tts-vits-model.cc
184 offline-tts.cc 184 offline-tts.cc
185 piper-phonemize-lexicon.cc 185 piper-phonemize-lexicon.cc
  186 + vocoder.cc
  187 + vocos-vocoder.cc
186 ) 188 )
187 endif() 189 endif()
188 190
@@ -45,11 +45,21 @@ class HifiganVocoder::Impl { @@ -45,11 +45,21 @@ class HifiganVocoder::Impl {
45 Init(buf.data(), buf.size()); 45 Init(buf.data(), buf.size());
46 } 46 }
47 47
48 - Ort::Value Run(Ort::Value mel) const { 48 + std::vector<float> Run(Ort::Value mel) const {
49 auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1, 49 auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1,
50 output_names_ptr_.data(), output_names_ptr_.size()); 50 output_names_ptr_.data(), output_names_ptr_.size());
51 51
52 - return std::move(out[0]); 52 + std::vector<int64_t> audio_shape =
  53 + out[0].GetTensorTypeAndShapeInfo().GetShape();
  54 +
  55 + int64_t total = 1;
  56 + // The output shape may be (1, 1, total) or (1, total) or (total,)
  57 + for (auto i : audio_shape) {
  58 + total *= i;
  59 + }
  60 +
  61 + const float *p = out[0].GetTensorData<float>();
  62 + return {p, p + total};
53 } 63 }
54 64
55 private: 65 private:
@@ -88,7 +98,7 @@ HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads, @@ -88,7 +98,7 @@ HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads,
88 98
89 HifiganVocoder::~HifiganVocoder() = default; 99 HifiganVocoder::~HifiganVocoder() = default;
90 100
91 -Ort::Value HifiganVocoder::Run(Ort::Value mel) const { 101 +std::vector<float> HifiganVocoder::Run(Ort::Value mel) const {
92 return impl_->Run(std::move(mel)); 102 return impl_->Run(std::move(mel));
93 } 103 }
94 104
@@ -7,14 +7,16 @@ @@ -7,14 +7,16 @@
7 7
8 #include <memory> 8 #include <memory>
9 #include <string> 9 #include <string>
  10 +#include <vector>
10 11
11 #include "onnxruntime_cxx_api.h" // NOLINT 12 #include "onnxruntime_cxx_api.h" // NOLINT
  13 +#include "sherpa-onnx/csrc/vocoder.h"
12 14
13 namespace sherpa_onnx { 15 namespace sherpa_onnx {
14 16
15 -class HifiganVocoder { 17 +class HifiganVocoder : public Vocoder {
16 public: 18 public:
17 - ~HifiganVocoder(); 19 + ~HifiganVocoder() override;
18 20
19 HifiganVocoder(int32_t num_threads, const std::string &provider, 21 HifiganVocoder(int32_t num_threads, const std::string &provider,
20 const std::string &model); 22 const std::string &model);
@@ -26,7 +28,7 @@ class HifiganVocoder { @@ -26,7 +28,7 @@ class HifiganVocoder {
26 /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). 28 /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
27 * @return Return a float32 tensor of shape (batch_size, num_samples). 29 * @return Return a float32 tensor of shape (batch_size, num_samples).
28 */ 30 */
29 - Ort::Value Run(Ort::Value mel) const; 31 + std::vector<float> Run(Ort::Value mel) const override;
30 32
31 private: 33 private:
32 class Impl; 34 class Impl;
@@ -13,7 +13,6 @@ @@ -13,7 +13,6 @@
13 #include "fst/extensions/far/far.h" 13 #include "fst/extensions/far/far.h"
14 #include "kaldifst/csrc/kaldi-fst-io.h" 14 #include "kaldifst/csrc/kaldi-fst-io.h"
15 #include "kaldifst/csrc/text-normalizer.h" 15 #include "kaldifst/csrc/text-normalizer.h"
16 -#include "sherpa-onnx/csrc/hifigan-vocoder.h"  
17 #include "sherpa-onnx/csrc/jieba-lexicon.h" 16 #include "sherpa-onnx/csrc/jieba-lexicon.h"
18 #include "sherpa-onnx/csrc/lexicon.h" 17 #include "sherpa-onnx/csrc/lexicon.h"
19 #include "sherpa-onnx/csrc/macros.h" 18 #include "sherpa-onnx/csrc/macros.h"
@@ -25,6 +24,7 @@ @@ -25,6 +24,7 @@
25 #include "sherpa-onnx/csrc/onnx-utils.h" 24 #include "sherpa-onnx/csrc/onnx-utils.h"
26 #include "sherpa-onnx/csrc/piper-phonemize-lexicon.h" 25 #include "sherpa-onnx/csrc/piper-phonemize-lexicon.h"
27 #include "sherpa-onnx/csrc/text-utils.h" 26 #include "sherpa-onnx/csrc/text-utils.h"
  27 +#include "sherpa-onnx/csrc/vocoder.h"
28 28
29 namespace sherpa_onnx { 29 namespace sherpa_onnx {
30 30
@@ -33,9 +33,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { @@ -33,9 +33,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
33 explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config) 33 explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config)
34 : config_(config), 34 : config_(config),
35 model_(std::make_unique<OfflineTtsMatchaModel>(config.model)), 35 model_(std::make_unique<OfflineTtsMatchaModel>(config.model)),
36 - vocoder_(std::make_unique<HifiganVocoder>(  
37 - config.model.num_threads, config.model.provider,  
38 - config.model.matcha.vocoder)) { 36 + vocoder_(Vocoder::Create(config.model)) {
39 InitFrontend(); 37 InitFrontend();
40 38
41 if (!config.rule_fsts.empty()) { 39 if (!config.rule_fsts.empty()) {
@@ -92,9 +90,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { @@ -92,9 +90,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
92 OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config) 90 OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config)
93 : config_(config), 91 : config_(config),
94 model_(std::make_unique<OfflineTtsMatchaModel>(mgr, config.model)), 92 model_(std::make_unique<OfflineTtsMatchaModel>(mgr, config.model)),
95 - vocoder_(std::make_unique<HifiganVocoder>(  
96 - mgr, config.model.num_threads, config.model.provider,  
97 - config.model.matcha.vocoder)) { 93 + vocoder_(Vocoder::Create(mgr, config.model)) {
98 InitFrontend(mgr); 94 InitFrontend(mgr);
99 95
100 if (!config.rule_fsts.empty()) { 96 if (!config.rule_fsts.empty()) {
@@ -382,22 +378,11 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { @@ -382,22 +378,11 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
382 memory_info, x.data(), x.size(), x_shape.data(), x_shape.size()); 378 memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
383 379
384 Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed); 380 Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed);
385 - Ort::Value audio = vocoder_->Run(std::move(mel));  
386 -  
387 - std::vector<int64_t> audio_shape =  
388 - audio.GetTensorTypeAndShapeInfo().GetShape();  
389 -  
390 - int64_t total = 1;  
391 - // The output shape may be (1, 1, total) or (1, total) or (total,)  
392 - for (auto i : audio_shape) {  
393 - total *= i;  
394 - }  
395 -  
396 - const float *p = audio.GetTensorData<float>();  
397 381
398 GeneratedAudio ans; 382 GeneratedAudio ans;
  383 +
  384 + ans.samples = vocoder_->Run(std::move(mel));
399 ans.sample_rate = model_->GetMetaData().sample_rate; 385 ans.sample_rate = model_->GetMetaData().sample_rate;
400 - ans.samples = std::vector<float>(p, p + total);  
401 386
402 float silence_scale = config_.silence_scale; 387 float silence_scale = config_.silence_scale;
403 if (silence_scale != 1) { 388 if (silence_scale != 1) {
@@ -410,7 +395,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { @@ -410,7 +395,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
410 private: 395 private:
411 OfflineTtsConfig config_; 396 OfflineTtsConfig config_;
412 std::unique_ptr<OfflineTtsMatchaModel> model_; 397 std::unique_ptr<OfflineTtsMatchaModel> model_;
413 - std::unique_ptr<HifiganVocoder> vocoder_; 398 + std::unique_ptr<Vocoder> vocoder_;
414 std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_; 399 std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
415 std::unique_ptr<OfflineTtsFrontend> frontend_; 400 std::unique_ptr<OfflineTtsFrontend> frontend_;
416 }; 401 };
  1 +// sherpa-onnx/csrc/vocoder.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#include "sherpa-onnx/csrc/vocoder.h"
  6 +
  7 +#if __ANDROID_API__ >= 9
  8 +#include "android/asset_manager.h"
  9 +#include "android/asset_manager_jni.h"
  10 +#endif
  11 +
  12 +#if __OHOS__
  13 +#include "rawfile/raw_file_manager.h"
  14 +#endif
  15 +
  16 +#include "sherpa-onnx/csrc/file-utils.h"
  17 +#include "sherpa-onnx/csrc/hifigan-vocoder.h"
  18 +#include "sherpa-onnx/csrc/macros.h"
  19 +#include "sherpa-onnx/csrc/onnx-utils.h"
  20 +#include "sherpa-onnx/csrc/vocos-vocoder.h"
  21 +
  22 +namespace sherpa_onnx {
  23 +
  24 +namespace {
  25 +
  26 +enum class ModelType : std::uint8_t {
  27 + kHifigan,
  28 + kVocoos,
  29 + kUnknown,
  30 +};
  31 +
  32 +} // namespace
  33 +
  34 +static ModelType GetModelType(char *model_data, size_t model_data_length,
  35 + bool debug) {
  36 + Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
  37 + Ort::SessionOptions sess_opts;
  38 + sess_opts.SetIntraOpNumThreads(1);
  39 + sess_opts.SetInterOpNumThreads(1);
  40 +
  41 + auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length,
  42 + sess_opts);
  43 +
  44 + Ort::ModelMetadata meta_data = sess->GetModelMetadata();
  45 + if (debug) {
  46 + std::ostringstream os;
  47 + PrintModelMetadata(os, meta_data);
  48 +#if __OHOS__
  49 + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
  50 +#else
  51 + SHERPA_ONNX_LOGE("%s", os.str().c_str());
  52 +#endif
  53 + }
  54 +
  55 + Ort::AllocatorWithDefaultOptions allocator;
  56 + auto model_type =
  57 + LookupCustomModelMetaData(meta_data, "model_type", allocator);
  58 + if (model_type.empty()) {
  59 + SHERPA_ONNX_LOGE(
  60 + "No model_type in the metadata!\n"
  61 + "Please make sure you are using the vocoder from "
  62 + "https://github.com/k2-fsa/sherpa-onnx/releases/tag/vocoder-models");
  63 + return ModelType::kUnknown;
  64 + }
  65 +
  66 + if (model_type == "hifigan") {
  67 + return ModelType::kHifigan;
  68 + } else if (model_type == "vocos") {
  69 + return ModelType::kVocoos;
  70 + } else {
  71 + SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str());
  72 + return ModelType::kUnknown;
  73 + }
  74 +}
  75 +
  76 +std::unique_ptr<Vocoder> Vocoder::Create(const OfflineTtsModelConfig &config) {
  77 + auto buffer = ReadFile(config.matcha.vocoder);
  78 + auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  79 +
  80 + switch (model_type) {
  81 + case ModelType::kHifigan:
  82 + return std::make_unique<HifiganVocoder>(
  83 + config.num_threads, config.provider, config.matcha.vocoder);
  84 + case ModelType::kVocoos:
  85 + return std::make_unique<VocosVocoder>(config);
  86 + case ModelType::kUnknown:
  87 + SHERPA_ONNX_LOGE("Unknown model type in vocoder!");
  88 + return nullptr;
  89 + }
  90 +}
  91 +
  92 +template <typename Manager>
  93 +std::unique_ptr<Vocoder> Vocoder::Create(Manager *mgr,
  94 + const OfflineTtsModelConfig &config) {
  95 + auto buffer = ReadFile(mgr, config.matcha.vocoder);
  96 + auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  97 +
  98 + switch (model_type) {
  99 + case ModelType::kHifigan:
  100 + return std::make_unique<HifiganVocoder>(
  101 + config.num_threads, config.provider, config.matcha.vocoder);
  102 + case ModelType::kVocoos:
  103 + return std::make_unique<VocosVocoder>(config);
  104 + case ModelType::kUnknown:
  105 + SHERPA_ONNX_LOGE("Unknown model type in vocoder!");
  106 + return nullptr;
  107 + }
  108 +}
  109 +
  110 +#if __ANDROID_API__ >= 9
  111 +template std::unique_ptr<Vocoder> Vocoder::Create(
  112 + AAssetManager *mgr, const OfflineTtsModelConfig &config);
  113 +#endif
  114 +
  115 +#if __OHOS__
  116 +template std::unique_ptr<Vocoder> Vocoder::Create(
  117 + NativeResourceManager *mgr, const OfflineTtsModelConfig &config);
  118 +#endif
  119 +
  120 +} // namespace sherpa_onnx
  1 +// sherpa-onnx/csrc/vocoder.h
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_VOCODER_H_
  6 +#define SHERPA_ONNX_CSRC_VOCODER_H_
  7 +
  8 +#include <memory>
  9 +#include <string>
  10 +#include <vector>
  11 +
  12 +#include "onnxruntime_cxx_api.h" // NOLINT
  13 +#include "sherpa-onnx/csrc/offline-tts-model-config.h"
  14 +
  15 +namespace sherpa_onnx {
  16 +
  17 +class Vocoder {
  18 + public:
  19 + virtual ~Vocoder() = default;
  20 +
  21 + static std::unique_ptr<Vocoder> Create(const OfflineTtsModelConfig &config);
  22 +
  23 + template <typename Manager>
  24 + static std::unique_ptr<Vocoder> Create(Manager *mgr,
  25 + const OfflineTtsModelConfig &config);
  26 +
  27 + /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
  28 + * @return Return a float32 vector containing audio samples..
  29 + */
  30 + virtual std::vector<float> Run(Ort::Value mel) const = 0;
  31 +};
  32 +
  33 +} // namespace sherpa_onnx
  34 +
  35 +#endif // SHERPA_ONNX_CSRC_VOCODER_H_
  1 +// sherpa-onnx/csrc/vocos-vocoder.cc
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#include "sherpa-onnx/csrc/vocos-vocoder.h"
  6 +
  7 +#include <string>
  8 +#include <utility>
  9 +#include <vector>
  10 +
  11 +#if __ANDROID_API__ >= 9
  12 +#include "android/asset_manager.h"
  13 +#include "android/asset_manager_jni.h"
  14 +#endif
  15 +
  16 +#if __OHOS__
  17 +#include "rawfile/raw_file_manager.h"
  18 +#endif
  19 +
  20 +#include "kaldi-native-fbank/csrc/istft.h"
  21 +#include "sherpa-onnx/csrc/file-utils.h"
  22 +#include "sherpa-onnx/csrc/macros.h"
  23 +#include "sherpa-onnx/csrc/onnx-utils.h"
  24 +#include "sherpa-onnx/csrc/session.h"
  25 +
  26 +namespace sherpa_onnx {
  27 +
  28 +struct VocosModelMetaData {
  29 + int32_t n_fft;
  30 + int32_t hop_length;
  31 + int32_t win_length;
  32 + int32_t center;
  33 + int32_t normalized;
  34 + std::string window_type;
  35 + std::string pad_mode;
  36 +};
  37 +
  38 +class VocosVocoder::Impl {
  39 + public:
  40 + explicit Impl(const OfflineTtsModelConfig &config)
  41 + : config_(config),
  42 + env_(ORT_LOGGING_LEVEL_ERROR),
  43 + sess_opts_(GetSessionOptions(config.num_threads, config.provider)),
  44 + allocator_{} {
  45 + auto buf = ReadFile(config.matcha.vocoder);
  46 + Init(buf.data(), buf.size());
  47 + }
  48 +
  49 + template <typename Manager>
  50 + explicit Impl(Manager *mgr, const OfflineTtsModelConfig &config)
  51 + : config_(config),
  52 + env_(ORT_LOGGING_LEVEL_ERROR),
  53 + sess_opts_(GetSessionOptions(config.num_threads, config.provider)),
  54 + allocator_{} {
  55 + auto buf = ReadFile(mgr, config.matcha.vocoder);
  56 + Init(buf.data(), buf.size());
  57 + }
  58 +
  59 + std::vector<float> Run(Ort::Value mel) const {
  60 + auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1,
  61 + output_names_ptr_.data(), output_names_ptr_.size());
  62 +
  63 + std::vector<int64_t> shape = out[0].GetTensorTypeAndShapeInfo().GetShape();
  64 +
  65 + if (shape[0] != 1) {
  66 + SHERPA_ONNX_LOGE("Support only batch size 1, given: %d",
  67 + static_cast<int32_t>(shape[0]));
  68 + SHERPA_ONNX_EXIT(-1);
  69 + }
  70 +
  71 + knf::StftResult stft_result;
  72 + stft_result.num_frames = shape[2];
  73 + stft_result.real.resize(shape[1] * shape[2]);
  74 + stft_result.imag.resize(shape[1] * shape[2]);
  75 +
  76 + // stft_result.real: (num_frames, n_fft/2+1), flattened in row major
  77 +
  78 + // mag.shape: (batch_size, n_fft/2+1, num_frames)
  79 + const float *p_mag = out[0].GetTensorData<float>();
  80 + const float *p_x = out[1].GetTensorData<float>();
  81 + const float *p_y = out[2].GetTensorData<float>();
  82 +
  83 + for (int32_t frame_index = 0; frame_index < static_cast<int32_t>(shape[2]);
  84 + ++frame_index) {
  85 + for (int32_t bin = 0; bin < static_cast<int32_t>(shape[1]); ++bin) {
  86 + stft_result.real[frame_index * shape[1] + bin] =
  87 + p_mag[bin * shape[2] + frame_index] *
  88 + p_x[bin * shape[2] + frame_index];
  89 + stft_result.imag[frame_index * shape[1] + bin] =
  90 + p_mag[bin * shape[2] + frame_index] *
  91 + p_y[bin * shape[2] + frame_index];
  92 + }
  93 + }
  94 +
  95 + knf::StftConfig stft_config;
  96 + stft_config.n_fft = meta_.n_fft;
  97 + stft_config.hop_length = meta_.hop_length;
  98 + stft_config.win_length = meta_.win_length;
  99 + stft_config.normalized = meta_.normalized;
  100 + stft_config.center = meta_.center;
  101 + stft_config.window_type = meta_.window_type;
  102 + stft_config.pad_mode = meta_.pad_mode;
  103 +
  104 + knf::IStft istft(stft_config);
  105 + return istft.Compute(stft_result);
  106 + }
  107 +
  108 + private:
  109 + void Init(void *model_data, size_t model_data_length) {
  110 + sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
  111 + sess_opts_);
  112 +
  113 + GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
  114 +
  115 + GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
  116 +
  117 + // get meta data
  118 + Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
  119 + if (config_.debug) {
  120 + std::ostringstream os;
  121 + os << "---Vocos model---\n";
  122 + PrintModelMetadata(os, meta_data);
  123 +
  124 + os << "----------input names----------\n";
  125 + int32_t i = 0;
  126 + for (const auto &s : input_names_) {
  127 + os << i << " " << s << "\n";
  128 + ++i;
  129 + }
  130 + os << "----------output names----------\n";
  131 + i = 0;
  132 + for (const auto &s : output_names_) {
  133 + os << i << " " << s << "\n";
  134 + ++i;
  135 + }
  136 +
  137 +#if __OHOS__
  138 + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
  139 +#else
  140 + SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
  141 +#endif
  142 + }
  143 +
  144 + Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
  145 + SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft");
  146 + SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length");
  147 + SHERPA_ONNX_READ_META_DATA(meta_.win_length, "win_length");
  148 + SHERPA_ONNX_READ_META_DATA(meta_.center, "center");
  149 + SHERPA_ONNX_READ_META_DATA(meta_.normalized, "normalized");
  150 + SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type");
  151 + SHERPA_ONNX_READ_META_DATA_STR(meta_.pad_mode, "pad_mode");
  152 + }
  153 +
  154 + private:
  155 + OfflineTtsModelConfig config_;
  156 + VocosModelMetaData meta_;
  157 +
  158 + Ort::Env env_;
  159 + Ort::SessionOptions sess_opts_;
  160 + Ort::AllocatorWithDefaultOptions allocator_;
  161 +
  162 + std::unique_ptr<Ort::Session> sess_;
  163 +
  164 + std::vector<std::string> input_names_;
  165 + std::vector<const char *> input_names_ptr_;
  166 +
  167 + std::vector<std::string> output_names_;
  168 + std::vector<const char *> output_names_ptr_;
  169 +};
  170 +
  171 +VocosVocoder::VocosVocoder(const OfflineTtsModelConfig &config)
  172 + : impl_(std::make_unique<Impl>(config)) {}
  173 +
  174 +template <typename Manager>
  175 +VocosVocoder::VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config)
  176 + : impl_(std::make_unique<Impl>(mgr, config)) {}
  177 +
  178 +VocosVocoder::~VocosVocoder() = default;
  179 +
  180 +std::vector<float> VocosVocoder::Run(Ort::Value mel) const {
  181 + return impl_->Run(std::move(mel));
  182 +}
  183 +
  184 +#if __ANDROID_API__ >= 9
  185 +template VocosVocoder::VocosVocoder(AAssetManager *mgr,
  186 + const OfflineTtsModelConfig &config);
  187 +#endif
  188 +
  189 +#if __OHOS__
  190 +template VocosVocoder::VocosVocoder(NativeResourceManager *mgr,
  191 + const OfflineTtsModelConfig &config);
  192 +#endif
  193 +
  194 +} // namespace sherpa_onnx
  1 +// sherpa-onnx/csrc/vocos-vocoder.h
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_
  6 +#define SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_
  7 +
  8 +#include <memory>
  9 +#include <string>
  10 +#include <vector>
  11 +
  12 +#include "onnxruntime_cxx_api.h" // NOLINT
  13 +#include "sherpa-onnx/csrc/offline-tts-model-config.h"
  14 +#include "sherpa-onnx/csrc/vocoder.h"
  15 +
  16 +namespace sherpa_onnx {
  17 +
  18 +class VocosVocoder : public Vocoder {
  19 + public:
  20 + ~VocosVocoder() override;
  21 +
  22 + explicit VocosVocoder(const OfflineTtsModelConfig &config);
  23 +
  24 + template <typename Manager>
  25 + VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config);
  26 +
  27 + /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames).
  28 + * @return Return a float32 tensor of shape (batch_size, num_samples).
  29 + */
  30 + std::vector<float> Run(Ort::Value mel) const override;
  31 +
  32 + private:
  33 + class Impl;
  34 + std::unique_ptr<Impl> impl_;
  35 +};
  36 +
  37 +} // namespace sherpa_onnx
  38 +
  39 +#endif // SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_
@@ -17,8 +17,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then @@ -17,8 +17,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
17 rm matcha-icefall-en_US-ljspeech.tar.bz2 17 rm matcha-icefall-en_US-ljspeech.tar.bz2
18 fi 18 fi
19 19
20 -if [ ! -f ./hifigan_v2.onnx ]; then  
21 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 20 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  21 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
22 fi 22 fi
23 23
24 if [ ! -e ./tts-matcha-en ]; then 24 if [ ! -e ./tts-matcha-en ]; then
@@ -16,8 +16,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then @@ -16,8 +16,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
16 rm matcha-icefall-zh-baker.tar.bz2 16 rm matcha-icefall-zh-baker.tar.bz2
17 fi 17 fi
18 18
19 -if [ ! -f ./hifigan_v2.onnx ]; then  
20 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx 19 +if [ ! -f ./vocos-22khz-univ.onnx ]; then
  20 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
21 fi 21 fi
22 22
23 if [ ! -e ./tts-matcha-zh ]; then 23 if [ ! -e ./tts-matcha-zh ]; then
@@ -6,7 +6,7 @@ class MyClass { @@ -6,7 +6,7 @@ class MyClass {
6 6
7 func run() { 7 func run() {
8 let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx" 8 let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
9 - let vocoder = "./hifigan_v2.onnx" 9 + let vocoder = "./vocos-22khz-univ.onnx"
10 let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt" 10 let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
11 let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data" 11 let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
12 let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( 12 let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
@@ -6,7 +6,7 @@ class MyClass { @@ -6,7 +6,7 @@ class MyClass {
6 6
7 func run() { 7 func run() {
8 let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx" 8 let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
9 - let vocoder = "./hifigan_v2.onnx" 9 + let vocoder = "./vocos-22khz-univ.onnx"
10 let lexicon = "./matcha-icefall-zh-baker/lexicon.txt" 10 let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
11 let tokens = "./matcha-icefall-zh-baker/tokens.txt" 11 let tokens = "./matcha-icefall-zh-baker/tokens.txt"
12 let dictDir = "./matcha-icefall-zh-baker/dict" 12 let dictDir = "./matcha-icefall-zh-baker/dict"