正在显示
62 个修改的文件
包含
558 行增加
和
162 行删除
| @@ -121,19 +121,19 @@ rm -rf kokoro-en-v0_19 | @@ -121,19 +121,19 @@ rm -rf kokoro-en-v0_19 | ||
| 121 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | 121 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 |
| 122 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 122 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 123 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 123 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 124 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 124 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 125 | 125 | ||
| 126 | node ./test_tts_non_streaming_matcha_icefall_en.js | 126 | node ./test_tts_non_streaming_matcha_icefall_en.js |
| 127 | -rm hifigan_v2.onnx | 127 | +rm vocos-22khz-univ.onnx |
| 128 | rm -rf matcha-icefall-en_US-ljspeech | 128 | rm -rf matcha-icefall-en_US-ljspeech |
| 129 | 129 | ||
| 130 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | 130 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 |
| 131 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 131 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 132 | rm matcha-icefall-zh-baker.tar.bz2 | 132 | rm matcha-icefall-zh-baker.tar.bz2 |
| 133 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 133 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 134 | 134 | ||
| 135 | node ./test_tts_non_streaming_matcha_icefall_zh.js | 135 | node ./test_tts_non_streaming_matcha_icefall_zh.js |
| 136 | -rm hifigan_v2.onnx | 136 | +rm vocos-22khz-univ.onnx |
| 137 | rm -rf matcha-icefall-zh-baker | 137 | rm -rf matcha-icefall-zh-baker |
| 138 | ls -lh *.wav | 138 | ls -lh *.wav |
| 139 | 139 |
| @@ -42,12 +42,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -42,12 +42,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 42 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 42 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 43 | rm matcha-icefall-zh-baker.tar.bz2 | 43 | rm matcha-icefall-zh-baker.tar.bz2 |
| 44 | 44 | ||
| 45 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 45 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 46 | 46 | ||
| 47 | node ./test-offline-tts-matcha-zh.js | 47 | node ./test-offline-tts-matcha-zh.js |
| 48 | 48 | ||
| 49 | rm -rf matcha-icefall-zh-baker | 49 | rm -rf matcha-icefall-zh-baker |
| 50 | -rm hifigan_v2.onnx | 50 | +rm vocos-22khz-univ.onnx |
| 51 | + | ||
| 51 | 52 | ||
| 52 | echo "---" | 53 | echo "---" |
| 53 | 54 | ||
| @@ -55,12 +56,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -55,12 +56,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 55 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 56 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 56 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 57 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 57 | 58 | ||
| 58 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 59 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 59 | 60 | ||
| 60 | node ./test-offline-tts-matcha-en.js | 61 | node ./test-offline-tts-matcha-en.js |
| 61 | 62 | ||
| 62 | rm -rf matcha-icefall-en_US-ljspeech | 63 | rm -rf matcha-icefall-en_US-ljspeech |
| 63 | -rm hifigan_v2.onnx | 64 | +rm vocos-22khz-univ.onnx |
| 64 | 65 | ||
| 65 | echo "---" | 66 | echo "---" |
| 66 | 67 |
| @@ -50,11 +50,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -50,11 +50,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 50 | tar xvf matcha-tts-fa_en-musa.tar.bz2 | 50 | tar xvf matcha-tts-fa_en-musa.tar.bz2 |
| 51 | rm matcha-tts-fa_en-musa.tar.bz2 | 51 | rm matcha-tts-fa_en-musa.tar.bz2 |
| 52 | 52 | ||
| 53 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 53 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 54 | + | ||
| 54 | 55 | ||
| 55 | $EXE \ | 56 | $EXE \ |
| 56 | --matcha-acoustic-model=./matcha-tts-fa_en-musa/model.onnx \ | 57 | --matcha-acoustic-model=./matcha-tts-fa_en-musa/model.onnx \ |
| 57 | - --matcha-vocoder=./hifigan_v2.onnx \ | 58 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 58 | --matcha-tokens=./matcha-tts-fa_en-musa/tokens.txt \ | 59 | --matcha-tokens=./matcha-tts-fa_en-musa/tokens.txt \ |
| 59 | --matcha-data-dir=./matcha-tts-fa_en-musa/espeak-ng-data \ | 60 | --matcha-data-dir=./matcha-tts-fa_en-musa/espeak-ng-data \ |
| 60 | --output-filename=./tts/test-matcha-fa-en-musa.wav \ | 61 | --output-filename=./tts/test-matcha-fa-en-musa.wav \ |
| @@ -62,7 +63,7 @@ $EXE \ | @@ -62,7 +63,7 @@ $EXE \ | ||
| 62 | "How are you doing today? این یک نمونه ی تست فارسی است. This is a test." | 63 | "How are you doing today? این یک نمونه ی تست فارسی است. This is a test." |
| 63 | 64 | ||
| 64 | rm -rf matcha-tts-fa_en-musa | 65 | rm -rf matcha-tts-fa_en-musa |
| 65 | -rm hifigan_v2.onnx | 66 | +rm vocos-22khz-univ.onnx |
| 66 | ls -lh tts/*.wav | 67 | ls -lh tts/*.wav |
| 67 | 68 | ||
| 68 | log "------------------------------------------------------------" | 69 | log "------------------------------------------------------------" |
| @@ -72,11 +73,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -72,11 +73,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 72 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 73 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 73 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 74 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 74 | 75 | ||
| 75 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 76 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 77 | + | ||
| 76 | 78 | ||
| 77 | $EXE \ | 79 | $EXE \ |
| 78 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 80 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 79 | - --matcha-vocoder=./hifigan_v2.onnx \ | 81 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 80 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 82 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 81 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 83 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 82 | --num-threads=2 \ | 84 | --num-threads=2 \ |
| @@ -84,7 +86,7 @@ $EXE \ | @@ -84,7 +86,7 @@ $EXE \ | ||
| 84 | --debug=1 \ | 86 | --debug=1 \ |
| 85 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | 87 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 86 | 88 | ||
| 87 | -rm hifigan_v2.onnx | 89 | +rm vocos-22khz-univ.onnx |
| 88 | rm -rf matcha-icefall-en_US-ljspeech | 90 | rm -rf matcha-icefall-en_US-ljspeech |
| 89 | ls -lh tts/*.wav | 91 | ls -lh tts/*.wav |
| 90 | 92 | ||
| @@ -95,11 +97,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -95,11 +97,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 95 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 97 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 96 | rm matcha-icefall-zh-baker.tar.bz2 | 98 | rm matcha-icefall-zh-baker.tar.bz2 |
| 97 | 99 | ||
| 98 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 100 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 99 | 101 | ||
| 100 | $EXE \ | 102 | $EXE \ |
| 101 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 103 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 102 | - --matcha-vocoder=./hifigan_v2.onnx \ | 104 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 103 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 105 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 104 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | 106 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 105 | --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ | 107 | --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -110,7 +112,7 @@ $EXE \ | @@ -110,7 +112,7 @@ $EXE \ | ||
| 110 | 112 | ||
| 111 | $EXE \ | 113 | $EXE \ |
| 112 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 114 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 113 | - --matcha-vocoder=./hifigan_v2.onnx \ | 115 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 114 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 116 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 115 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | 117 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 116 | --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ | 118 | --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -119,7 +121,7 @@ $EXE \ | @@ -119,7 +121,7 @@ $EXE \ | ||
| 119 | --output-filename=./tts/matcha-baker-zh-2.wav \ | 121 | --output-filename=./tts/matcha-baker-zh-2.wav \ |
| 120 | "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" | 122 | "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" |
| 121 | 123 | ||
| 122 | -rm hifigan_v2.onnx | 124 | +rm vocos-22khz-univ.onnx |
| 123 | rm -rf matcha-icefall-zh-baker | 125 | rm -rf matcha-icefall-zh-baker |
| 124 | 126 | ||
| 125 | log "------------------------------------------------------------" | 127 | log "------------------------------------------------------------" |
| @@ -320,18 +320,18 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -320,18 +320,18 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 320 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 320 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 321 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 321 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 322 | 322 | ||
| 323 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 323 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 324 | 324 | ||
| 325 | python3 ./python-api-examples/offline-tts.py \ | 325 | python3 ./python-api-examples/offline-tts.py \ |
| 326 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 326 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 327 | - --matcha-vocoder=./hifigan_v2.onnx \ | 327 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 328 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 328 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 329 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 329 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 330 | --output-filename=./tts/test-matcha-ljspeech-en.wav \ | 330 | --output-filename=./tts/test-matcha-ljspeech-en.wav \ |
| 331 | --num-threads=2 \ | 331 | --num-threads=2 \ |
| 332 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | 332 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 333 | 333 | ||
| 334 | -rm hifigan_v2.onnx | 334 | +rm vocos-22khz-univ.onnx |
| 335 | rm -rf matcha-icefall-en_US-ljspeech | 335 | rm -rf matcha-icefall-en_US-ljspeech |
| 336 | 336 | ||
| 337 | log "matcha-baker-zh test" | 337 | log "matcha-baker-zh test" |
| @@ -340,11 +340,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -340,11 +340,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 340 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 340 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 341 | rm matcha-icefall-zh-baker.tar.bz2 | 341 | rm matcha-icefall-zh-baker.tar.bz2 |
| 342 | 342 | ||
| 343 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 343 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 344 | 344 | ||
| 345 | python3 ./python-api-examples/offline-tts.py \ | 345 | python3 ./python-api-examples/offline-tts.py \ |
| 346 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 346 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 347 | - --matcha-vocoder=./hifigan_v2.onnx \ | 347 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 348 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 348 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 349 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | 349 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 350 | --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ | 350 | --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ |
| @@ -353,7 +353,7 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -353,7 +353,7 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 353 | "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" | 353 | "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" |
| 354 | 354 | ||
| 355 | rm -rf matcha-icefall-zh-baker | 355 | rm -rf matcha-icefall-zh-baker |
| 356 | -rm hifigan_v2.onnx | 356 | +rm vocos-22khz-univ.onnx |
| 357 | 357 | ||
| 358 | log "vits-ljs test" | 358 | log "vits-ljs test" |
| 359 | 359 |
| @@ -228,7 +228,7 @@ jobs: | @@ -228,7 +228,7 @@ jobs: | ||
| 228 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 228 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 229 | rm matcha-icefall-zh-baker.tar.bz2 | 229 | rm matcha-icefall-zh-baker.tar.bz2 |
| 230 | 230 | ||
| 231 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 231 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 232 | 232 | ||
| 233 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | 233 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH |
| 234 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | 234 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH |
| @@ -237,7 +237,7 @@ jobs: | @@ -237,7 +237,7 @@ jobs: | ||
| 237 | 237 | ||
| 238 | rm ./matcha-tts-zh-c-api | 238 | rm ./matcha-tts-zh-c-api |
| 239 | rm -rf matcha-icefall-* | 239 | rm -rf matcha-icefall-* |
| 240 | - rm hifigan_v2.onnx | 240 | + rm vocos-22khz-univ.onnx |
| 241 | 241 | ||
| 242 | - name: Test Matcha TTS (en) | 242 | - name: Test Matcha TTS (en) |
| 243 | shell: bash | 243 | shell: bash |
| @@ -252,7 +252,7 @@ jobs: | @@ -252,7 +252,7 @@ jobs: | ||
| 252 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 252 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 253 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 253 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 254 | 254 | ||
| 255 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 255 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 256 | 256 | ||
| 257 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | 257 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH |
| 258 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | 258 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH |
| @@ -261,7 +261,7 @@ jobs: | @@ -261,7 +261,7 @@ jobs: | ||
| 261 | 261 | ||
| 262 | rm ./matcha-tts-en-c-api | 262 | rm ./matcha-tts-en-c-api |
| 263 | rm -rf matcha-icefall-* | 263 | rm -rf matcha-icefall-* |
| 264 | - rm hifigan_v2.onnx | 264 | + rm vocos-22khz-univ.onnx |
| 265 | 265 | ||
| 266 | - uses: actions/upload-artifact@v4 | 266 | - uses: actions/upload-artifact@v4 |
| 267 | with: | 267 | with: |
| @@ -237,7 +237,7 @@ jobs: | @@ -237,7 +237,7 @@ jobs: | ||
| 237 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 237 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 238 | rm matcha-icefall-zh-baker.tar.bz2 | 238 | rm matcha-icefall-zh-baker.tar.bz2 |
| 239 | 239 | ||
| 240 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 240 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 241 | 241 | ||
| 242 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | 242 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH |
| 243 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | 243 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH |
| @@ -245,7 +245,7 @@ jobs: | @@ -245,7 +245,7 @@ jobs: | ||
| 245 | ./matcha-tts-zh-cxx-api | 245 | ./matcha-tts-zh-cxx-api |
| 246 | 246 | ||
| 247 | rm -rf matcha-icefall-* | 247 | rm -rf matcha-icefall-* |
| 248 | - rm hifigan_v2.onnx | 248 | + rm vocos-22khz-univ.onnx |
| 249 | rm matcha-tts-zh-cxx-api | 249 | rm matcha-tts-zh-cxx-api |
| 250 | 250 | ||
| 251 | - name: Test Matcha TTS (en) | 251 | - name: Test Matcha TTS (en) |
| @@ -262,7 +262,7 @@ jobs: | @@ -262,7 +262,7 @@ jobs: | ||
| 262 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 262 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 263 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 263 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 264 | 264 | ||
| 265 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 265 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 266 | 266 | ||
| 267 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | 267 | export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH |
| 268 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | 268 | export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH |
| @@ -271,7 +271,7 @@ jobs: | @@ -271,7 +271,7 @@ jobs: | ||
| 271 | 271 | ||
| 272 | rm matcha-tts-en-cxx-api | 272 | rm matcha-tts-en-cxx-api |
| 273 | rm -rf matcha-icefall-* | 273 | rm -rf matcha-icefall-* |
| 274 | - rm hifigan_v2.onnx | 274 | + rm vocos-22khz-univ.onnx |
| 275 | 275 | ||
| 276 | - uses: actions/upload-artifact@v4 | 276 | - uses: actions/upload-artifact@v4 |
| 277 | with: | 277 | with: |
| @@ -265,7 +265,7 @@ jobs: | @@ -265,7 +265,7 @@ jobs: | ||
| 265 | rm -rf kokoro-en-* | 265 | rm -rf kokoro-en-* |
| 266 | 266 | ||
| 267 | rm -rf matcha-icefall-* | 267 | rm -rf matcha-icefall-* |
| 268 | - rm hifigan_v2.onnx | 268 | + rm vocos-22khz-univ.onnx |
| 269 | 269 | ||
| 270 | ./run-non-streaming-tts-piper-en.sh | 270 | ./run-non-streaming-tts-piper-en.sh |
| 271 | rm -rf vits-piper-* | 271 | rm -rf vits-piper-* |
| @@ -262,7 +262,7 @@ class MainActivity : AppCompatActivity() { | @@ -262,7 +262,7 @@ class MainActivity : AppCompatActivity() { | ||
| 262 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | 262 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker |
| 263 | // modelDir = "matcha-icefall-zh-baker" | 263 | // modelDir = "matcha-icefall-zh-baker" |
| 264 | // acousticModelName = "model-steps-3.onnx" | 264 | // acousticModelName = "model-steps-3.onnx" |
| 265 | - // vocoder = "hifigan_v2.onnx" | 265 | + // vocoder = "vocos-22khz-univ.onnx" |
| 266 | // lexicon = "lexicon.txt" | 266 | // lexicon = "lexicon.txt" |
| 267 | // dictDir = "matcha-icefall-zh-baker/dict" | 267 | // dictDir = "matcha-icefall-zh-baker/dict" |
| 268 | 268 | ||
| @@ -271,7 +271,7 @@ class MainActivity : AppCompatActivity() { | @@ -271,7 +271,7 @@ class MainActivity : AppCompatActivity() { | ||
| 271 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | 271 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker |
| 272 | // modelDir = "matcha-icefall-en_US-ljspeech" | 272 | // modelDir = "matcha-icefall-en_US-ljspeech" |
| 273 | // acousticModelName = "model-steps-3.onnx" | 273 | // acousticModelName = "model-steps-3.onnx" |
| 274 | - // vocoder = "hifigan_v2.onnx" | 274 | + // vocoder = "vocos-22khz-univ.onnx" |
| 275 | // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" | 275 | // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" |
| 276 | 276 | ||
| 277 | // Example 9 | 277 | // Example 9 |
| @@ -131,7 +131,7 @@ object TtsEngine { | @@ -131,7 +131,7 @@ object TtsEngine { | ||
| 131 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | 131 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker |
| 132 | // modelDir = "matcha-icefall-zh-baker" | 132 | // modelDir = "matcha-icefall-zh-baker" |
| 133 | // acousticModelName = "model-steps-3.onnx" | 133 | // acousticModelName = "model-steps-3.onnx" |
| 134 | - // vocoder = "hifigan_v2.onnx" | 134 | + // vocoder = "vocos-22khz-univ.onnx" |
| 135 | // lexicon = "lexicon.txt" | 135 | // lexicon = "lexicon.txt" |
| 136 | // dictDir = "matcha-icefall-zh-baker/dict" | 136 | // dictDir = "matcha-icefall-zh-baker/dict" |
| 137 | // lang = "zho" | 137 | // lang = "zho" |
| @@ -141,7 +141,7 @@ object TtsEngine { | @@ -141,7 +141,7 @@ object TtsEngine { | ||
| 141 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | 141 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker |
| 142 | // modelDir = "matcha-icefall-en_US-ljspeech" | 142 | // modelDir = "matcha-icefall-en_US-ljspeech" |
| 143 | // acousticModelName = "model-steps-3.onnx" | 143 | // acousticModelName = "model-steps-3.onnx" |
| 144 | - // vocoder = "hifigan_v2.onnx" | 144 | + // vocoder = "vocos-22khz-univ.onnx" |
| 145 | // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" | 145 | // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" |
| 146 | // lang = "eng" | 146 | // lang = "eng" |
| 147 | 147 |
| @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 13 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 13 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 14 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 14 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 15 | 15 | ||
| 16 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 16 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 17 | 17 | ||
| 18 | ./matcha-tts-en-c-api | 18 | ./matcha-tts-en-c-api |
| 19 | 19 | ||
| @@ -40,7 +40,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -40,7 +40,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 40 | config.model.matcha.acoustic_model = | 40 | config.model.matcha.acoustic_model = |
| 41 | "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; | 41 | "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; |
| 42 | 42 | ||
| 43 | - config.model.matcha.vocoder = "./hifigan_v2.onnx"; | 43 | + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; |
| 44 | 44 | ||
| 45 | config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; | 45 | config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; |
| 46 | 46 |
| @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 13 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 13 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 14 | rm matcha-icefall-zh-baker.tar.bz2 | 14 | rm matcha-icefall-zh-baker.tar.bz2 |
| 15 | 15 | ||
| 16 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 16 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 17 | 17 | ||
| 18 | ./matcha-tts-zh-c-api | 18 | ./matcha-tts-zh-c-api |
| 19 | 19 | ||
| @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 39 | memset(&config, 0, sizeof(config)); | 39 | memset(&config, 0, sizeof(config)); |
| 40 | config.model.matcha.acoustic_model = | 40 | config.model.matcha.acoustic_model = |
| 41 | "./matcha-icefall-zh-baker/model-steps-3.onnx"; | 41 | "./matcha-icefall-zh-baker/model-steps-3.onnx"; |
| 42 | - config.model.matcha.vocoder = "./hifigan_v2.onnx"; | 42 | + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; |
| 43 | config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; | 43 | config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; |
| 44 | config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; | 44 | config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; |
| 45 | config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; | 45 | config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; |
| @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 13 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 13 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 14 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 14 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 15 | 15 | ||
| 16 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 16 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 17 | 17 | ||
| 18 | ./matcha-tts-en-cxx-api | 18 | ./matcha-tts-en-cxx-api |
| 19 | 19 | ||
| @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 39 | config.model.matcha.acoustic_model = | 39 | config.model.matcha.acoustic_model = |
| 40 | "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; | 40 | "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; |
| 41 | 41 | ||
| 42 | - config.model.matcha.vocoder = "./hifigan_v2.onnx"; | 42 | + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; |
| 43 | 43 | ||
| 44 | config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; | 44 | config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; |
| 45 | 45 |
| @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 13 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 13 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 14 | rm matcha-icefall-zh-baker.tar.bz2 | 14 | rm matcha-icefall-zh-baker.tar.bz2 |
| 15 | 15 | ||
| 16 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 16 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 17 | 17 | ||
| 18 | ./matcha-tts-zh-cxx-api | 18 | ./matcha-tts-zh-cxx-api |
| 19 | 19 | ||
| @@ -37,7 +37,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -37,7 +37,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 37 | OfflineTtsConfig config; | 37 | OfflineTtsConfig config; |
| 38 | config.model.matcha.acoustic_model = | 38 | config.model.matcha.acoustic_model = |
| 39 | "./matcha-icefall-zh-baker/model-steps-3.onnx"; | 39 | "./matcha-icefall-zh-baker/model-steps-3.onnx"; |
| 40 | - config.model.matcha.vocoder = "./hifigan_v2.onnx"; | 40 | + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; |
| 41 | config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; | 41 | config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; |
| 42 | config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; | 42 | config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; |
| 43 | config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; | 43 | config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; |
| @@ -14,14 +14,14 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -14,14 +14,14 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 14 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 14 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 15 | fi | 15 | fi |
| 16 | 16 | ||
| 17 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 18 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 17 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 18 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 19 | fi | 19 | fi |
| 20 | 20 | ||
| 21 | dart run \ | 21 | dart run \ |
| 22 | ./bin/matcha-en.dart \ | 22 | ./bin/matcha-en.dart \ |
| 23 | --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 23 | --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 24 | - --vocoder ./hifigan_v2.onnx \ | 24 | + --vocoder ./vocos-22khz-univ.onnx \ |
| 25 | --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \ | 25 | --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 26 | --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 26 | --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 27 | --sid 0 \ | 27 | --sid 0 \ |
| @@ -13,14 +13,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -13,14 +13,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 13 | rm matcha-icefall-zh-baker.tar.bz2 | 13 | rm matcha-icefall-zh-baker.tar.bz2 |
| 14 | fi | 14 | fi |
| 15 | 15 | ||
| 16 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 17 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 16 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 17 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 18 | fi | 18 | fi |
| 19 | 19 | ||
| 20 | dart run \ | 20 | dart run \ |
| 21 | ./bin/matcha-zh.dart \ | 21 | ./bin/matcha-zh.dart \ |
| 22 | --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ | 22 | --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 23 | - --vocoder ./hifigan_v2.onnx \ | 23 | + --vocoder ./vocos-22khz-univ.onnx \ |
| 24 | --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ | 24 | --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ |
| 25 | --tokens ./matcha-icefall-zh-baker/tokens.txt \ | 25 | --tokens ./matcha-icefall-zh-baker/tokens.txt \ |
| 26 | --dict-dir ./matcha-icefall-zh-baker/dict \ | 26 | --dict-dir ./matcha-icefall-zh-baker/dict \ |
| @@ -33,7 +33,7 @@ dart run \ | @@ -33,7 +33,7 @@ dart run \ | ||
| 33 | dart run \ | 33 | dart run \ |
| 34 | ./bin/matcha-zh.dart \ | 34 | ./bin/matcha-zh.dart \ |
| 35 | --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ | 35 | --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 36 | - --vocoder ./hifigan_v2.onnx \ | 36 | + --vocoder ./vocos-22khz-univ.onnx \ |
| 37 | --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ | 37 | --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ |
| 38 | --tokens ./matcha-icefall-zh-baker/tokens.txt \ | 38 | --tokens ./matcha-icefall-zh-baker/tokens.txt \ |
| 39 | --dict-dir ./matcha-icefall-zh-baker/dict \ | 39 | --dict-dir ./matcha-icefall-zh-baker/dict \ |
| @@ -92,11 +92,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -92,11 +92,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 92 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 92 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 93 | rm matcha-icefall-zh-baker.tar.bz2 | 93 | rm matcha-icefall-zh-baker.tar.bz2 |
| 94 | 94 | ||
| 95 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 95 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 96 | 96 | ||
| 97 | dotnet run \ | 97 | dotnet run \ |
| 98 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 98 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 99 | - --matcha-vocoder=./hifigan_v2.onnx \ | 99 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 100 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 100 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 101 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ | 101 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 102 | --dict-dir=./matcha-icefall-zh-baker/dict \ | 102 | --dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -111,11 +111,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -111,11 +111,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 111 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 111 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 112 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 112 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 113 | 113 | ||
| 114 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 114 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 115 | 115 | ||
| 116 | dotnet run \ | 116 | dotnet run \ |
| 117 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 117 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 118 | - --matcha-vocoder=./hifigan_v2.onnx \ | 118 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 119 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ | 119 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 120 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 120 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 121 | --debug=1 \ | 121 | --debug=1 \ |
| @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 12 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 12 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 13 | fi | 13 | fi |
| 14 | 14 | ||
| 15 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 16 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 15 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 16 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 17 | fi | 17 | fi |
| 18 | 18 | ||
| 19 | dotnet run \ | 19 | dotnet run \ |
| 20 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 20 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 21 | - --matcha-vocoder=./hifigan_v2.onnx \ | 21 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 22 | --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 22 | --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 23 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 23 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 24 | --debug=1 \ | 24 | --debug=1 \ |
| @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 10 | rm matcha-icefall-zh-baker.tar.bz2 | 10 | rm matcha-icefall-zh-baker.tar.bz2 |
| 11 | fi | 11 | fi |
| 12 | 12 | ||
| 13 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 14 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 13 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 14 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 15 | fi | 15 | fi |
| 16 | 16 | ||
| 17 | 17 | ||
| 18 | dotnet run \ | 18 | dotnet run \ |
| 19 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 19 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 20 | - --matcha-vocoder=./hifigan_v2.onnx \ | 20 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 21 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 21 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 22 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ | 22 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 23 | --dict-dir=./matcha-icefall-zh-baker/dict \ | 23 | --dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -85,11 +85,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -85,11 +85,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 85 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 85 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 86 | rm matcha-icefall-zh-baker.tar.bz2 | 86 | rm matcha-icefall-zh-baker.tar.bz2 |
| 87 | 87 | ||
| 88 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 88 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 89 | 89 | ||
| 90 | dotnet run \ | 90 | dotnet run \ |
| 91 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 91 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 92 | - --matcha-vocoder=./hifigan_v2.onnx \ | 92 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 93 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 93 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 94 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ | 94 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 95 | --dict-dir=./matcha-icefall-zh-baker/dict \ | 95 | --dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -104,11 +104,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -104,11 +104,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 104 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 104 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 105 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 105 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 106 | 106 | ||
| 107 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 107 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 108 | 108 | ||
| 109 | dotnet run \ | 109 | dotnet run \ |
| 110 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 110 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 111 | - --matcha-vocoder=./hifigan_v2.onnx \ | 111 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 112 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ | 112 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 113 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 113 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 114 | --debug=1 \ | 114 | --debug=1 \ |
| @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 12 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 12 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 13 | fi | 13 | fi |
| 14 | 14 | ||
| 15 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 16 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 15 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 16 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 17 | fi | 17 | fi |
| 18 | 18 | ||
| 19 | dotnet run \ | 19 | dotnet run \ |
| 20 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 20 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 21 | - --matcha-vocoder=./hifigan_v2.onnx \ | 21 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 22 | --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 22 | --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 23 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 23 | --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 24 | --debug=1 \ | 24 | --debug=1 \ |
| @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 10 | rm matcha-icefall-zh-baker.tar.bz2 | 10 | rm matcha-icefall-zh-baker.tar.bz2 |
| 11 | fi | 11 | fi |
| 12 | 12 | ||
| 13 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 14 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 13 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 14 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 15 | fi | 15 | fi |
| 16 | 16 | ||
| 17 | 17 | ||
| 18 | dotnet run \ | 18 | dotnet run \ |
| 19 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 19 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 20 | - --matcha-vocoder=./hifigan_v2.onnx \ | 20 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 21 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 21 | --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 22 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ | 22 | --tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 23 | --dict-dir=./matcha-icefall-zh-baker/dict \ | 23 | --dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -12,8 +12,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -12,8 +12,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 12 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 12 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 13 | fi | 13 | fi |
| 14 | 14 | ||
| 15 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 16 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 15 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 16 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 17 | fi | 17 | fi |
| 18 | 18 | ||
| 19 | go mod tidy | 19 | go mod tidy |
| @@ -21,7 +21,7 @@ go build | @@ -21,7 +21,7 @@ go build | ||
| 21 | 21 | ||
| 22 | ./non-streaming-tts \ | 22 | ./non-streaming-tts \ |
| 23 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 23 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 24 | - --matcha-vocoder=./hifigan_v2.onnx \ | 24 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 25 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 25 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 26 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 26 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 27 | --debug=1 \ | 27 | --debug=1 \ |
| @@ -11,8 +11,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -11,8 +11,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 11 | rm matcha-icefall-zh-baker.tar.bz2 | 11 | rm matcha-icefall-zh-baker.tar.bz2 |
| 12 | fi | 12 | fi |
| 13 | 13 | ||
| 14 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 15 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 14 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 15 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 16 | fi | 16 | fi |
| 17 | 17 | ||
| 18 | go mod tidy | 18 | go mod tidy |
| @@ -20,7 +20,7 @@ go build | @@ -20,7 +20,7 @@ go build | ||
| 20 | 20 | ||
| 21 | ./non-streaming-tts \ | 21 | ./non-streaming-tts \ |
| 22 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 22 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 23 | - --matcha-vocoder=./hifigan_v2.onnx \ | 23 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 24 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 24 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 25 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | 25 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 26 | --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ | 26 | --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ |
| @@ -159,7 +159,7 @@ function initTts(context: Context): OfflineTts { | @@ -159,7 +159,7 @@ function initTts(context: Context): OfflineTts { | ||
| 159 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | 159 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker |
| 160 | // modelDir = 'matcha-icefall-zh-baker'; | 160 | // modelDir = 'matcha-icefall-zh-baker'; |
| 161 | // acousticModelName = 'model-steps-3.onnx'; | 161 | // acousticModelName = 'model-steps-3.onnx'; |
| 162 | - // vocoder = 'hifigan_v2.onnx'; | 162 | + // vocoder = 'vocos-22khz-univ.onnx'; |
| 163 | // lexicon = 'lexicon.txt'; | 163 | // lexicon = 'lexicon.txt'; |
| 164 | // dictDir = 'dict'; | 164 | // dictDir = 'dict'; |
| 165 | // ruleFsts = `date.fst,phone.fst,number.fst`; | 165 | // ruleFsts = `date.fst,phone.fst,number.fst`; |
| @@ -169,7 +169,7 @@ function initTts(context: Context): OfflineTts { | @@ -169,7 +169,7 @@ function initTts(context: Context): OfflineTts { | ||
| 169 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | 169 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker |
| 170 | // modelDir = 'matcha-icefall-en_US-ljspeech'; | 170 | // modelDir = 'matcha-icefall-en_US-ljspeech'; |
| 171 | // acousticModelName = 'model-steps-3.onnx'; | 171 | // acousticModelName = 'model-steps-3.onnx'; |
| 172 | - // vocoder = 'hifigan_v2.onnx'; | 172 | + // vocoder = 'vocos-22khz-univ.onnx'; |
| 173 | // dataDir = 'espeak-ng-data'; | 173 | // dataDir = 'espeak-ng-data'; |
| 174 | 174 | ||
| 175 | // Example 10 | 175 | // Example 10 |
| @@ -131,7 +131,7 @@ func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper { | @@ -131,7 +131,7 @@ func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper { | ||
| 131 | // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | 131 | // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker |
| 132 | 132 | ||
| 133 | let acousticModel = getResource("model-steps-3", "onnx") | 133 | let acousticModel = getResource("model-steps-3", "onnx") |
| 134 | - let vocoder = getResource("hifigan_v2", "onnx") | 134 | + let vocoder = getResource("vocos-22khz-univ", "onnx") |
| 135 | 135 | ||
| 136 | let tokens = getResource("tokens", "txt") | 136 | let tokens = getResource("tokens", "txt") |
| 137 | let lexicon = getResource("lexicon", "txt") | 137 | let lexicon = getResource("lexicon", "txt") |
| @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaEn { | @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaEn { | ||
| 10 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | 10 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker |
| 11 | // to download model files | 11 | // to download model files |
| 12 | String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; | 12 | String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; |
| 13 | - String vocoder = "./hifigan_v2.onnx"; | 13 | + String vocoder = "./vocos-22khz-univ.onnx"; |
| 14 | String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; | 14 | String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; |
| 15 | String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"; | 15 | String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"; |
| 16 | String text = | 16 | String text = |
| @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaZh { | @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaZh { | ||
| 10 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | 10 | // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker |
| 11 | // to download model files | 11 | // to download model files |
| 12 | String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"; | 12 | String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"; |
| 13 | - String vocoder = "./hifigan_v2.onnx"; | 13 | + String vocoder = "./vocos-22khz-univ.onnx"; |
| 14 | String tokens = "./matcha-icefall-zh-baker/tokens.txt"; | 14 | String tokens = "./matcha-icefall-zh-baker/tokens.txt"; |
| 15 | String lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; | 15 | String lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; |
| 16 | String dictDir = "./matcha-icefall-zh-baker/dict"; | 16 | String dictDir = "./matcha-icefall-zh-baker/dict"; |
| @@ -35,8 +35,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -35,8 +35,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 35 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 35 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 36 | fi | 36 | fi |
| 37 | 37 | ||
| 38 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 39 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 38 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 39 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 40 | fi | 40 | fi |
| 41 | 41 | ||
| 42 | java \ | 42 | java \ |
| @@ -34,8 +34,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -34,8 +34,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 34 | rm matcha-icefall-zh-baker.tar.bz2 | 34 | rm matcha-icefall-zh-baker.tar.bz2 |
| 35 | fi | 35 | fi |
| 36 | 36 | ||
| 37 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 38 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 37 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 38 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 39 | fi | 39 | fi |
| 40 | 40 | ||
| 41 | java \ | 41 | java \ |
| @@ -111,8 +111,8 @@ function testTts() { | @@ -111,8 +111,8 @@ function testTts() { | ||
| 111 | rm matcha-icefall-zh-baker.tar.bz2 | 111 | rm matcha-icefall-zh-baker.tar.bz2 |
| 112 | fi | 112 | fi |
| 113 | 113 | ||
| 114 | - if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 115 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 114 | + if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 115 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 116 | fi | 116 | fi |
| 117 | 117 | ||
| 118 | if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then | 118 | if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then |
| @@ -58,7 +58,7 @@ fun testMatcha() { | @@ -58,7 +58,7 @@ fun testMatcha() { | ||
| 58 | model=OfflineTtsModelConfig( | 58 | model=OfflineTtsModelConfig( |
| 59 | matcha=OfflineTtsMatchaModelConfig( | 59 | matcha=OfflineTtsMatchaModelConfig( |
| 60 | acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx", | 60 | acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx", |
| 61 | - vocoder="./hifigan_v2.onnx", | 61 | + vocoder="./vocos-22khz-univ.onnx", |
| 62 | tokens="./matcha-icefall-zh-baker/tokens.txt", | 62 | tokens="./matcha-icefall-zh-baker/tokens.txt", |
| 63 | lexicon="./matcha-icefall-zh-baker/lexicon.txt", | 63 | lexicon="./matcha-icefall-zh-baker/lexicon.txt", |
| 64 | dictDir="./matcha-icefall-zh-baker/dict", | 64 | dictDir="./matcha-icefall-zh-baker/dict", |
| @@ -513,9 +513,9 @@ void CNonStreamingTextToSpeechDlg::Init() { | @@ -513,9 +513,9 @@ void CNonStreamingTextToSpeechDlg::Init() { | ||
| 513 | "(c) Switch to the directory matcha-icefall-zh-baker\r\n" | 513 | "(c) Switch to the directory matcha-icefall-zh-baker\r\n" |
| 514 | "(d) Rename model-steps-3.onnx to model.onnx\r\n" | 514 | "(d) Rename model-steps-3.onnx to model.onnx\r\n" |
| 515 | "(e) Download a vocoder model from \r\n" | 515 | "(e) Download a vocoder model from \r\n" |
| 516 | - " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx\r\n" | ||
| 517 | - "(f) Rename hifigan_v2.onnx to hifigan.onnx\r\n" | ||
| 518 | - "(g) Remember to put hifigan.onnx in the directory matcha-icefall-zh-baker\r\n" | 516 | + " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx\r\n" |
| 517 | + "(f) Rename vocos-22khz-univ.onnx to vocos.onnx\r\n" | ||
| 518 | + "(g) Remember to put vocos.onnx in the directory matcha-icefall-zh-baker\r\n" | ||
| 519 | "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n" | 519 | "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n" |
| 520 | "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n"; | 520 | "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n"; |
| 521 | 521 | ||
| @@ -540,10 +540,16 @@ void CNonStreamingTextToSpeechDlg::Init() { | @@ -540,10 +540,16 @@ void CNonStreamingTextToSpeechDlg::Init() { | ||
| 540 | config.model.kokoro.dict_dir = "./dict"; | 540 | config.model.kokoro.dict_dir = "./dict"; |
| 541 | config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt"; | 541 | config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt"; |
| 542 | } | 542 | } |
| 543 | - } else if (Exists("./hifigan.onnx")) { | 543 | + } else if (Exists("./hifigan.onnx") || Exists("./vocos.onnx")) { |
| 544 | // it is a matcha tts model | 544 | // it is a matcha tts model |
| 545 | config.model.matcha.acoustic_model = "./model.onnx"; | 545 | config.model.matcha.acoustic_model = "./model.onnx"; |
| 546 | - config.model.matcha.vocoder = "./hifigan.onnx"; | 546 | + |
| 547 | + if (Exists("./hifigan.onnx")) { | ||
| 548 | + config.model.matcha.vocoder = "./hifigan.onnx"; | ||
| 549 | + } else if (Exists("./vocos.onnx")) { | ||
| 550 | + config.model.matcha.vocoder = "./vocos.onnx"; | ||
| 551 | + } | ||
| 552 | + | ||
| 547 | config.model.matcha.tokens = "./tokens.txt"; | 553 | config.model.matcha.tokens = "./tokens.txt"; |
| 548 | 554 | ||
| 549 | if (Exists("./espeak-ng-data/phontab")) { | 555 | if (Exists("./espeak-ng-data/phontab")) { |
| @@ -400,7 +400,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -400,7 +400,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 400 | tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | 400 | tar xf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 401 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 401 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 402 | 402 | ||
| 403 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 403 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 404 | 404 | ||
| 405 | node ./test_tts_non_streaming_matcha_icefall_en.js | 405 | node ./test_tts_non_streaming_matcha_icefall_en.js |
| 406 | ``` | 406 | ``` |
| @@ -411,7 +411,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -411,7 +411,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 411 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 411 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 412 | rm matcha-icefall-zh-baker.tar.bz2 | 412 | rm matcha-icefall-zh-baker.tar.bz2 |
| 413 | 413 | ||
| 414 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 414 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 415 | 415 | ||
| 416 | node ./test_tts_non_streaming_matcha_icefall_zh.js | 416 | node ./test_tts_non_streaming_matcha_icefall_zh.js |
| 417 | ``` | 417 | ``` |
| @@ -9,7 +9,7 @@ function createOfflineTts() { | @@ -9,7 +9,7 @@ function createOfflineTts() { | ||
| 9 | model: { | 9 | model: { |
| 10 | matcha: { | 10 | matcha: { |
| 11 | acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', | 11 | acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', |
| 12 | - vocoder: './hifigan_v2.onnx', | 12 | + vocoder: './vocos-22khz-univ.onnx', |
| 13 | lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', | 13 | lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', |
| 14 | tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', | 14 | tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', |
| 15 | dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', | 15 | dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', |
| @@ -9,7 +9,7 @@ function createOfflineTts() { | @@ -9,7 +9,7 @@ function createOfflineTts() { | ||
| 9 | model: { | 9 | model: { |
| 10 | matcha: { | 10 | matcha: { |
| 11 | acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', | 11 | acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', |
| 12 | - vocoder: './hifigan_v2.onnx', | 12 | + vocoder: './vocos-22khz-univ.onnx', |
| 13 | lexicon: './matcha-icefall-zh-baker/lexicon.txt', | 13 | lexicon: './matcha-icefall-zh-baker/lexicon.txt', |
| 14 | tokens: './matcha-icefall-zh-baker/tokens.txt', | 14 | tokens: './matcha-icefall-zh-baker/tokens.txt', |
| 15 | dictDir: './matcha-icefall-zh-baker/dict', | 15 | dictDir: './matcha-icefall-zh-baker/dict', |
| @@ -83,7 +83,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -83,7 +83,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 83 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 83 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 84 | rm matcha-icefall-zh-baker.tar.bz2 | 84 | rm matcha-icefall-zh-baker.tar.bz2 |
| 85 | 85 | ||
| 86 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 86 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 87 | 87 | ||
| 88 | node ./test-offline-tts-matcha-zh.js | 88 | node ./test-offline-tts-matcha-zh.js |
| 89 | ``` | 89 | ``` |
| @@ -101,7 +101,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | @@ -101,7 +101,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i | ||
| 101 | tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | 101 | tar xf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 102 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 102 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 103 | 103 | ||
| 104 | -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 104 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 105 | 105 | ||
| 106 | node ./test-offline-tts-matcha-en.js | 106 | node ./test-offline-tts-matcha-en.js |
| 107 | ``` | 107 | ``` |
| @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); | @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); | ||
| 5 | function createOfflineTts() { | 5 | function createOfflineTts() { |
| 6 | let offlineTtsMatchaModelConfig = { | 6 | let offlineTtsMatchaModelConfig = { |
| 7 | acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', | 7 | acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', |
| 8 | - vocoder: './hifigan_v2.onnx', | 8 | + vocoder: './vocos-22khz-univ.onnx', |
| 9 | lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', | 9 | lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', |
| 10 | tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', | 10 | tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', |
| 11 | dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', | 11 | dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', |
| @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); | @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); | ||
| 5 | function createOfflineTts() { | 5 | function createOfflineTts() { |
| 6 | let offlineTtsMatchaModelConfig = { | 6 | let offlineTtsMatchaModelConfig = { |
| 7 | acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', | 7 | acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', |
| 8 | - vocoder: './hifigan_v2.onnx', | 8 | + vocoder: './vocos-22khz-univ.onnx', |
| 9 | lexicon: './matcha-icefall-zh-baker/lexicon.txt', | 9 | lexicon: './matcha-icefall-zh-baker/lexicon.txt', |
| 10 | tokens: './matcha-icefall-zh-baker/tokens.txt', | 10 | tokens: './matcha-icefall-zh-baker/tokens.txt', |
| 11 | dictDir: './matcha-icefall-zh-baker/dict', | 11 | dictDir: './matcha-icefall-zh-baker/dict', |
| @@ -115,7 +115,7 @@ var | @@ -115,7 +115,7 @@ var | ||
| 115 | Config: TSherpaOnnxOfflineTtsConfig; | 115 | Config: TSherpaOnnxOfflineTtsConfig; |
| 116 | begin | 116 | begin |
| 117 | Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; | 117 | Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; |
| 118 | - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | 118 | + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; |
| 119 | Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; | 119 | Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; |
| 120 | Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; | 120 | Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; |
| 121 | Config.Model.NumThreads := 1; | 121 | Config.Model.NumThreads := 1; |
| @@ -21,7 +21,7 @@ var | @@ -21,7 +21,7 @@ var | ||
| 21 | Config: TSherpaOnnxOfflineTtsConfig; | 21 | Config: TSherpaOnnxOfflineTtsConfig; |
| 22 | begin | 22 | begin |
| 23 | Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; | 23 | Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; |
| 24 | - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | 24 | + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; |
| 25 | Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; | 25 | Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; |
| 26 | Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; | 26 | Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; |
| 27 | Config.Model.NumThreads := 1; | 27 | Config.Model.NumThreads := 1; |
| @@ -115,7 +115,7 @@ var | @@ -115,7 +115,7 @@ var | ||
| 115 | Config: TSherpaOnnxOfflineTtsConfig; | 115 | Config: TSherpaOnnxOfflineTtsConfig; |
| 116 | begin | 116 | begin |
| 117 | Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; | 117 | Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; |
| 118 | - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | 118 | + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; |
| 119 | Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; | 119 | Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; |
| 120 | Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; | 120 | Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; |
| 121 | Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; | 121 | Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; |
| @@ -21,7 +21,7 @@ var | @@ -21,7 +21,7 @@ var | ||
| 21 | Config: TSherpaOnnxOfflineTtsConfig; | 21 | Config: TSherpaOnnxOfflineTtsConfig; |
| 22 | begin | 22 | begin |
| 23 | Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; | 23 | Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; |
| 24 | - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | 24 | + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; |
| 25 | Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; | 25 | Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; |
| 26 | Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; | 26 | Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; |
| 27 | Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; | 27 | Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; |
| @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 33 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 33 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 34 | fi | 34 | fi |
| 35 | 35 | ||
| 36 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 37 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 36 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 37 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 38 | fi | 38 | fi |
| 39 | 39 | ||
| 40 | fpc \ | 40 | fpc \ |
| @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 33 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 33 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 34 | fi | 34 | fi |
| 35 | 35 | ||
| 36 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 37 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 36 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 37 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 38 | fi | 38 | fi |
| 39 | 39 | ||
| 40 | fpc \ | 40 | fpc \ |
| @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 32 | rm matcha-icefall-zh-baker.tar.bz2 | 32 | rm matcha-icefall-zh-baker.tar.bz2 |
| 33 | fi | 33 | fi |
| 34 | 34 | ||
| 35 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 36 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 35 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 36 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 37 | fi | 37 | fi |
| 38 | 38 | ||
| 39 | fpc \ | 39 | fpc \ |
| @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 32 | rm matcha-icefall-zh-baker.tar.bz2 | 32 | rm matcha-icefall-zh-baker.tar.bz2 |
| 33 | fi | 33 | fi |
| 34 | 34 | ||
| 35 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 36 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 35 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 36 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 37 | fi | 37 | fi |
| 38 | 38 | ||
| 39 | fpc \ | 39 | fpc \ |
| @@ -59,11 +59,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -59,11 +59,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 59 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 59 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 60 | rm matcha-icefall-zh-baker.tar.bz2 | 60 | rm matcha-icefall-zh-baker.tar.bz2 |
| 61 | 61 | ||
| 62 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 62 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 63 | 63 | ||
| 64 | python3 ./python-api-examples/offline-tts-play.py \ | 64 | python3 ./python-api-examples/offline-tts-play.py \ |
| 65 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 65 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 66 | - --matcha-vocoder=./hifigan_v2.onnx \ | 66 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 67 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 67 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 68 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | 68 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 69 | --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ | 69 | --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ |
| @@ -77,11 +77,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -77,11 +77,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 77 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 77 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 78 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 78 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 79 | 79 | ||
| 80 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 80 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 81 | 81 | ||
| 82 | python3 ./python-api-examples/offline-tts-play.py \ | 82 | python3 ./python-api-examples/offline-tts-play.py \ |
| 83 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 83 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 84 | - --matcha-vocoder=./hifigan_v2.onnx \ | 84 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 85 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 85 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 86 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 86 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 87 | --output-filename=./test-matcha-ljspeech-en.wav \ | 87 | --output-filename=./test-matcha-ljspeech-en.wav \ |
| @@ -60,11 +60,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -60,11 +60,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 60 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 60 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| 61 | rm matcha-icefall-zh-baker.tar.bz2 | 61 | rm matcha-icefall-zh-baker.tar.bz2 |
| 62 | 62 | ||
| 63 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 63 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 64 | 64 | ||
| 65 | python3 ./python-api-examples/offline-tts.py \ | 65 | python3 ./python-api-examples/offline-tts.py \ |
| 66 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | 66 | --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ |
| 67 | - --matcha-vocoder=./hifigan_v2.onnx \ | 67 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 68 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | 68 | --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ |
| 69 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | 69 | --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ |
| 70 | --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ | 70 | --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ |
| @@ -78,11 +78,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | @@ -78,11 +78,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m | ||
| 78 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 78 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| 79 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 79 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 80 | 80 | ||
| 81 | -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 81 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx |
| 82 | 82 | ||
| 83 | python3 ./python-api-examples/offline-tts.py \ | 83 | python3 ./python-api-examples/offline-tts.py \ |
| 84 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | 84 | --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ |
| 85 | - --matcha-vocoder=./hifigan_v2.onnx \ | 85 | + --matcha-vocoder=./vocos-22khz-univ.onnx \ |
| 86 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | 86 | --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ |
| 87 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | 87 | --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ |
| 88 | --output-filename=./test-matcha-ljspeech-en.wav \ | 88 | --output-filename=./test-matcha-ljspeech-en.wav \ |
| @@ -395,7 +395,7 @@ def get_matcha_models() -> List[TtsModel]: | @@ -395,7 +395,7 @@ def get_matcha_models() -> List[TtsModel]: | ||
| 395 | s = [f"{m.model_dir}/{r}" for r in rule_fsts] | 395 | s = [f"{m.model_dir}/{r}" for r in rule_fsts] |
| 396 | m.rule_fsts = ",".join(s) | 396 | m.rule_fsts = ",".join(s) |
| 397 | m.dict_dir = m.model_dir + "/dict" | 397 | m.dict_dir = m.model_dir + "/dict" |
| 398 | - m.vocoder = "hifigan_v2.onnx" | 398 | + m.vocoder = "vocos-22khz-univ.onnx" |
| 399 | 399 | ||
| 400 | english_persian_models = [ | 400 | english_persian_models = [ |
| 401 | TtsModel( | 401 | TtsModel( |
| @@ -416,7 +416,7 @@ def get_matcha_models() -> List[TtsModel]: | @@ -416,7 +416,7 @@ def get_matcha_models() -> List[TtsModel]: | ||
| 416 | ] | 416 | ] |
| 417 | for m in english_persian_models: | 417 | for m in english_persian_models: |
| 418 | m.data_dir = f"{m.model_dir}/espeak-ng-data" | 418 | m.data_dir = f"{m.model_dir}/espeak-ng-data" |
| 419 | - m.vocoder = "hifigan_v2.onnx" | 419 | + m.vocoder = "vocos-22khz-univ.onnx" |
| 420 | 420 | ||
| 421 | return chinese_models + english_persian_models | 421 | return chinese_models + english_persian_models |
| 422 | 422 |
| @@ -20,8 +20,8 @@ if [ ! -f male/tokens.txt ]; then | @@ -20,8 +20,8 @@ if [ ! -f male/tokens.txt ]; then | ||
| 20 | curl -SL --output male/tokens.txt https://huggingface.co/mah92/Musa-FA_EN-Matcha-TTS-Model/resolve/main/tokens_sherpa_with_fa.txt | 20 | curl -SL --output male/tokens.txt https://huggingface.co/mah92/Musa-FA_EN-Matcha-TTS-Model/resolve/main/tokens_sherpa_with_fa.txt |
| 21 | fi | 21 | fi |
| 22 | 22 | ||
| 23 | -if [ ! -f hifigan_v2.onnx ]; then | ||
| 24 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 23 | +if [ ! -f vocos-22khz-univ.onnx ]; then |
| 24 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 25 | fi | 25 | fi |
| 26 | 26 | ||
| 27 | if [ ! -f .add-meta-data.done ]; then | 27 | if [ ! -f .add-meta-data.done ]; then |
| @@ -31,14 +31,14 @@ fi | @@ -31,14 +31,14 @@ fi | ||
| 31 | 31 | ||
| 32 | python3 ./test.py \ | 32 | python3 ./test.py \ |
| 33 | --am ./female/model.onnx \ | 33 | --am ./female/model.onnx \ |
| 34 | - --vocoder ./hifigan_v2.onnx \ | 34 | + --vocoder ./vocos-22khz-univ.onnx \ |
| 35 | --tokens ./female/tokens.txt \ | 35 | --tokens ./female/tokens.txt \ |
| 36 | --text "This is a test. این یک نمونه ی تست فارسی است." \ | 36 | --text "This is a test. این یک نمونه ی تست فارسی است." \ |
| 37 | --out-wav "./female-en-fa.wav" | 37 | --out-wav "./female-en-fa.wav" |
| 38 | 38 | ||
| 39 | python3 ./test.py \ | 39 | python3 ./test.py \ |
| 40 | --am ./male/model.onnx \ | 40 | --am ./male/model.onnx \ |
| 41 | - --vocoder ./hifigan_v2.onnx \ | 41 | + --vocoder ./vocos-22khz-univ.onnx \ |
| 42 | --tokens ./male/tokens.txt \ | 42 | --tokens ./male/tokens.txt \ |
| 43 | --text "This is a test. این یک نمونه ی تست فارسی است." \ | 43 | --text "This is a test. این یک نمونه ی تست فارسی است." \ |
| 44 | --out-wav "./male-en-fa.wav" | 44 | --out-wav "./male-en-fa.wav" |
| @@ -183,6 +183,8 @@ if(SHERPA_ONNX_ENABLE_TTS) | @@ -183,6 +183,8 @@ if(SHERPA_ONNX_ENABLE_TTS) | ||
| 183 | offline-tts-vits-model.cc | 183 | offline-tts-vits-model.cc |
| 184 | offline-tts.cc | 184 | offline-tts.cc |
| 185 | piper-phonemize-lexicon.cc | 185 | piper-phonemize-lexicon.cc |
| 186 | + vocoder.cc | ||
| 187 | + vocos-vocoder.cc | ||
| 186 | ) | 188 | ) |
| 187 | endif() | 189 | endif() |
| 188 | 190 |
| @@ -45,11 +45,21 @@ class HifiganVocoder::Impl { | @@ -45,11 +45,21 @@ class HifiganVocoder::Impl { | ||
| 45 | Init(buf.data(), buf.size()); | 45 | Init(buf.data(), buf.size()); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | - Ort::Value Run(Ort::Value mel) const { | 48 | + std::vector<float> Run(Ort::Value mel) const { |
| 49 | auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1, | 49 | auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1, |
| 50 | output_names_ptr_.data(), output_names_ptr_.size()); | 50 | output_names_ptr_.data(), output_names_ptr_.size()); |
| 51 | 51 | ||
| 52 | - return std::move(out[0]); | 52 | + std::vector<int64_t> audio_shape = |
| 53 | + out[0].GetTensorTypeAndShapeInfo().GetShape(); | ||
| 54 | + | ||
| 55 | + int64_t total = 1; | ||
| 56 | + // The output shape may be (1, 1, total) or (1, total) or (total,) | ||
| 57 | + for (auto i : audio_shape) { | ||
| 58 | + total *= i; | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + const float *p = out[0].GetTensorData<float>(); | ||
| 62 | + return {p, p + total}; | ||
| 53 | } | 63 | } |
| 54 | 64 | ||
| 55 | private: | 65 | private: |
| @@ -88,7 +98,7 @@ HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads, | @@ -88,7 +98,7 @@ HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads, | ||
| 88 | 98 | ||
| 89 | HifiganVocoder::~HifiganVocoder() = default; | 99 | HifiganVocoder::~HifiganVocoder() = default; |
| 90 | 100 | ||
| 91 | -Ort::Value HifiganVocoder::Run(Ort::Value mel) const { | 101 | +std::vector<float> HifiganVocoder::Run(Ort::Value mel) const { |
| 92 | return impl_->Run(std::move(mel)); | 102 | return impl_->Run(std::move(mel)); |
| 93 | } | 103 | } |
| 94 | 104 |
| @@ -7,14 +7,16 @@ | @@ -7,14 +7,16 @@ | ||
| 7 | 7 | ||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <string> | 9 | #include <string> |
| 10 | +#include <vector> | ||
| 10 | 11 | ||
| 11 | #include "onnxruntime_cxx_api.h" // NOLINT | 12 | #include "onnxruntime_cxx_api.h" // NOLINT |
| 13 | +#include "sherpa-onnx/csrc/vocoder.h" | ||
| 12 | 14 | ||
| 13 | namespace sherpa_onnx { | 15 | namespace sherpa_onnx { |
| 14 | 16 | ||
| 15 | -class HifiganVocoder { | 17 | +class HifiganVocoder : public Vocoder { |
| 16 | public: | 18 | public: |
| 17 | - ~HifiganVocoder(); | 19 | + ~HifiganVocoder() override; |
| 18 | 20 | ||
| 19 | HifiganVocoder(int32_t num_threads, const std::string &provider, | 21 | HifiganVocoder(int32_t num_threads, const std::string &provider, |
| 20 | const std::string &model); | 22 | const std::string &model); |
| @@ -26,7 +28,7 @@ class HifiganVocoder { | @@ -26,7 +28,7 @@ class HifiganVocoder { | ||
| 26 | /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). | 28 | /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). |
| 27 | * @return Return a float32 tensor of shape (batch_size, num_samples). | 29 | * @return Return a float32 tensor of shape (batch_size, num_samples). |
| 28 | */ | 30 | */ |
| 29 | - Ort::Value Run(Ort::Value mel) const; | 31 | + std::vector<float> Run(Ort::Value mel) const override; |
| 30 | 32 | ||
| 31 | private: | 33 | private: |
| 32 | class Impl; | 34 | class Impl; |
| @@ -13,7 +13,6 @@ | @@ -13,7 +13,6 @@ | ||
| 13 | #include "fst/extensions/far/far.h" | 13 | #include "fst/extensions/far/far.h" |
| 14 | #include "kaldifst/csrc/kaldi-fst-io.h" | 14 | #include "kaldifst/csrc/kaldi-fst-io.h" |
| 15 | #include "kaldifst/csrc/text-normalizer.h" | 15 | #include "kaldifst/csrc/text-normalizer.h" |
| 16 | -#include "sherpa-onnx/csrc/hifigan-vocoder.h" | ||
| 17 | #include "sherpa-onnx/csrc/jieba-lexicon.h" | 16 | #include "sherpa-onnx/csrc/jieba-lexicon.h" |
| 18 | #include "sherpa-onnx/csrc/lexicon.h" | 17 | #include "sherpa-onnx/csrc/lexicon.h" |
| 19 | #include "sherpa-onnx/csrc/macros.h" | 18 | #include "sherpa-onnx/csrc/macros.h" |
| @@ -25,6 +24,7 @@ | @@ -25,6 +24,7 @@ | ||
| 25 | #include "sherpa-onnx/csrc/onnx-utils.h" | 24 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 26 | #include "sherpa-onnx/csrc/piper-phonemize-lexicon.h" | 25 | #include "sherpa-onnx/csrc/piper-phonemize-lexicon.h" |
| 27 | #include "sherpa-onnx/csrc/text-utils.h" | 26 | #include "sherpa-onnx/csrc/text-utils.h" |
| 27 | +#include "sherpa-onnx/csrc/vocoder.h" | ||
| 28 | 28 | ||
| 29 | namespace sherpa_onnx { | 29 | namespace sherpa_onnx { |
| 30 | 30 | ||
| @@ -33,9 +33,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | @@ -33,9 +33,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | ||
| 33 | explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config) | 33 | explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config) |
| 34 | : config_(config), | 34 | : config_(config), |
| 35 | model_(std::make_unique<OfflineTtsMatchaModel>(config.model)), | 35 | model_(std::make_unique<OfflineTtsMatchaModel>(config.model)), |
| 36 | - vocoder_(std::make_unique<HifiganVocoder>( | ||
| 37 | - config.model.num_threads, config.model.provider, | ||
| 38 | - config.model.matcha.vocoder)) { | 36 | + vocoder_(Vocoder::Create(config.model)) { |
| 39 | InitFrontend(); | 37 | InitFrontend(); |
| 40 | 38 | ||
| 41 | if (!config.rule_fsts.empty()) { | 39 | if (!config.rule_fsts.empty()) { |
| @@ -92,9 +90,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | @@ -92,9 +90,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | ||
| 92 | OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config) | 90 | OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config) |
| 93 | : config_(config), | 91 | : config_(config), |
| 94 | model_(std::make_unique<OfflineTtsMatchaModel>(mgr, config.model)), | 92 | model_(std::make_unique<OfflineTtsMatchaModel>(mgr, config.model)), |
| 95 | - vocoder_(std::make_unique<HifiganVocoder>( | ||
| 96 | - mgr, config.model.num_threads, config.model.provider, | ||
| 97 | - config.model.matcha.vocoder)) { | 93 | + vocoder_(Vocoder::Create(mgr, config.model)) { |
| 98 | InitFrontend(mgr); | 94 | InitFrontend(mgr); |
| 99 | 95 | ||
| 100 | if (!config.rule_fsts.empty()) { | 96 | if (!config.rule_fsts.empty()) { |
| @@ -382,22 +378,11 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | @@ -382,22 +378,11 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | ||
| 382 | memory_info, x.data(), x.size(), x_shape.data(), x_shape.size()); | 378 | memory_info, x.data(), x.size(), x_shape.data(), x_shape.size()); |
| 383 | 379 | ||
| 384 | Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed); | 380 | Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed); |
| 385 | - Ort::Value audio = vocoder_->Run(std::move(mel)); | ||
| 386 | - | ||
| 387 | - std::vector<int64_t> audio_shape = | ||
| 388 | - audio.GetTensorTypeAndShapeInfo().GetShape(); | ||
| 389 | - | ||
| 390 | - int64_t total = 1; | ||
| 391 | - // The output shape may be (1, 1, total) or (1, total) or (total,) | ||
| 392 | - for (auto i : audio_shape) { | ||
| 393 | - total *= i; | ||
| 394 | - } | ||
| 395 | - | ||
| 396 | - const float *p = audio.GetTensorData<float>(); | ||
| 397 | 381 | ||
| 398 | GeneratedAudio ans; | 382 | GeneratedAudio ans; |
| 383 | + | ||
| 384 | + ans.samples = vocoder_->Run(std::move(mel)); | ||
| 399 | ans.sample_rate = model_->GetMetaData().sample_rate; | 385 | ans.sample_rate = model_->GetMetaData().sample_rate; |
| 400 | - ans.samples = std::vector<float>(p, p + total); | ||
| 401 | 386 | ||
| 402 | float silence_scale = config_.silence_scale; | 387 | float silence_scale = config_.silence_scale; |
| 403 | if (silence_scale != 1) { | 388 | if (silence_scale != 1) { |
| @@ -410,7 +395,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | @@ -410,7 +395,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { | ||
| 410 | private: | 395 | private: |
| 411 | OfflineTtsConfig config_; | 396 | OfflineTtsConfig config_; |
| 412 | std::unique_ptr<OfflineTtsMatchaModel> model_; | 397 | std::unique_ptr<OfflineTtsMatchaModel> model_; |
| 413 | - std::unique_ptr<HifiganVocoder> vocoder_; | 398 | + std::unique_ptr<Vocoder> vocoder_; |
| 414 | std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_; | 399 | std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_; |
| 415 | std::unique_ptr<OfflineTtsFrontend> frontend_; | 400 | std::unique_ptr<OfflineTtsFrontend> frontend_; |
| 416 | }; | 401 | }; |
sherpa-onnx/csrc/vocoder.cc
0 → 100644
| 1 | +// sherpa-onnx/csrc/vocoder.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/vocoder.h" | ||
| 6 | + | ||
| 7 | +#if __ANDROID_API__ >= 9 | ||
| 8 | +#include "android/asset_manager.h" | ||
| 9 | +#include "android/asset_manager_jni.h" | ||
| 10 | +#endif | ||
| 11 | + | ||
| 12 | +#if __OHOS__ | ||
| 13 | +#include "rawfile/raw_file_manager.h" | ||
| 14 | +#endif | ||
| 15 | + | ||
| 16 | +#include "sherpa-onnx/csrc/file-utils.h" | ||
| 17 | +#include "sherpa-onnx/csrc/hifigan-vocoder.h" | ||
| 18 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 19 | +#include "sherpa-onnx/csrc/onnx-utils.h" | ||
| 20 | +#include "sherpa-onnx/csrc/vocos-vocoder.h" | ||
| 21 | + | ||
| 22 | +namespace sherpa_onnx { | ||
| 23 | + | ||
| 24 | +namespace { | ||
| 25 | + | ||
| 26 | +enum class ModelType : std::uint8_t { | ||
| 27 | + kHifigan, | ||
| 28 | + kVocoos, | ||
| 29 | + kUnknown, | ||
| 30 | +}; | ||
| 31 | + | ||
| 32 | +} // namespace | ||
| 33 | + | ||
| 34 | +static ModelType GetModelType(char *model_data, size_t model_data_length, | ||
| 35 | + bool debug) { | ||
| 36 | + Ort::Env env(ORT_LOGGING_LEVEL_ERROR); | ||
| 37 | + Ort::SessionOptions sess_opts; | ||
| 38 | + sess_opts.SetIntraOpNumThreads(1); | ||
| 39 | + sess_opts.SetInterOpNumThreads(1); | ||
| 40 | + | ||
| 41 | + auto sess = std::make_unique<Ort::Session>(env, model_data, model_data_length, | ||
| 42 | + sess_opts); | ||
| 43 | + | ||
| 44 | + Ort::ModelMetadata meta_data = sess->GetModelMetadata(); | ||
| 45 | + if (debug) { | ||
| 46 | + std::ostringstream os; | ||
| 47 | + PrintModelMetadata(os, meta_data); | ||
| 48 | +#if __OHOS__ | ||
| 49 | + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); | ||
| 50 | +#else | ||
| 51 | + SHERPA_ONNX_LOGE("%s", os.str().c_str()); | ||
| 52 | +#endif | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + Ort::AllocatorWithDefaultOptions allocator; | ||
| 56 | + auto model_type = | ||
| 57 | + LookupCustomModelMetaData(meta_data, "model_type", allocator); | ||
| 58 | + if (model_type.empty()) { | ||
| 59 | + SHERPA_ONNX_LOGE( | ||
| 60 | + "No model_type in the metadata!\n" | ||
| 61 | + "Please make sure you are using the vocoder from " | ||
| 62 | + "https://github.com/k2-fsa/sherpa-onnx/releases/tag/vocoder-models"); | ||
| 63 | + return ModelType::kUnknown; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + if (model_type == "hifigan") { | ||
| 67 | + return ModelType::kHifigan; | ||
| 68 | + } else if (model_type == "vocos") { | ||
| 69 | + return ModelType::kVocoos; | ||
| 70 | + } else { | ||
| 71 | + SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str()); | ||
| 72 | + return ModelType::kUnknown; | ||
| 73 | + } | ||
| 74 | +} | ||
| 75 | + | ||
| 76 | +std::unique_ptr<Vocoder> Vocoder::Create(const OfflineTtsModelConfig &config) { | ||
| 77 | + auto buffer = ReadFile(config.matcha.vocoder); | ||
| 78 | + auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug); | ||
| 79 | + | ||
| 80 | + switch (model_type) { | ||
| 81 | + case ModelType::kHifigan: | ||
| 82 | + return std::make_unique<HifiganVocoder>( | ||
| 83 | + config.num_threads, config.provider, config.matcha.vocoder); | ||
| 84 | + case ModelType::kVocoos: | ||
| 85 | + return std::make_unique<VocosVocoder>(config); | ||
| 86 | + case ModelType::kUnknown: | ||
| 87 | + SHERPA_ONNX_LOGE("Unknown model type in vocoder!"); | ||
| 88 | + return nullptr; | ||
| 89 | + } | ||
| 90 | +} | ||
| 91 | + | ||
| 92 | +template <typename Manager> | ||
| 93 | +std::unique_ptr<Vocoder> Vocoder::Create(Manager *mgr, | ||
| 94 | + const OfflineTtsModelConfig &config) { | ||
| 95 | + auto buffer = ReadFile(mgr, config.matcha.vocoder); | ||
| 96 | + auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug); | ||
| 97 | + | ||
| 98 | + switch (model_type) { | ||
| 99 | + case ModelType::kHifigan: | ||
| 100 | + return std::make_unique<HifiganVocoder>( | ||
| 101 | + config.num_threads, config.provider, config.matcha.vocoder); | ||
| 102 | + case ModelType::kVocoos: | ||
| 103 | + return std::make_unique<VocosVocoder>(config); | ||
| 104 | + case ModelType::kUnknown: | ||
| 105 | + SHERPA_ONNX_LOGE("Unknown model type in vocoder!"); | ||
| 106 | + return nullptr; | ||
| 107 | + } | ||
| 108 | +} | ||
| 109 | + | ||
| 110 | +#if __ANDROID_API__ >= 9 | ||
| 111 | +template std::unique_ptr<Vocoder> Vocoder::Create( | ||
| 112 | + AAssetManager *mgr, const OfflineTtsModelConfig &config); | ||
| 113 | +#endif | ||
| 114 | + | ||
| 115 | +#if __OHOS__ | ||
| 116 | +template std::unique_ptr<Vocoder> Vocoder::Create( | ||
| 117 | + NativeResourceManager *mgr, const OfflineTtsModelConfig &config); | ||
| 118 | +#endif | ||
| 119 | + | ||
| 120 | +} // namespace sherpa_onnx |
sherpa-onnx/csrc/vocoder.h
0 → 100644
| 1 | +// sherpa-onnx/csrc/vocoder.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_VOCODER_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_VOCODER_H_ | ||
| 7 | + | ||
| 8 | +#include <memory> | ||
| 9 | +#include <string> | ||
| 10 | +#include <vector> | ||
| 11 | + | ||
| 12 | +#include "onnxruntime_cxx_api.h" // NOLINT | ||
| 13 | +#include "sherpa-onnx/csrc/offline-tts-model-config.h" | ||
| 14 | + | ||
| 15 | +namespace sherpa_onnx { | ||
| 16 | + | ||
| 17 | +class Vocoder { | ||
| 18 | + public: | ||
| 19 | + virtual ~Vocoder() = default; | ||
| 20 | + | ||
| 21 | + static std::unique_ptr<Vocoder> Create(const OfflineTtsModelConfig &config); | ||
| 22 | + | ||
| 23 | + template <typename Manager> | ||
| 24 | + static std::unique_ptr<Vocoder> Create(Manager *mgr, | ||
| 25 | + const OfflineTtsModelConfig &config); | ||
| 26 | + | ||
| 27 | + /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). | ||
| 28 | + * @return Return a float32 vector containing audio samples.. | ||
| 29 | + */ | ||
| 30 | + virtual std::vector<float> Run(Ort::Value mel) const = 0; | ||
| 31 | +}; | ||
| 32 | + | ||
| 33 | +} // namespace sherpa_onnx | ||
| 34 | + | ||
| 35 | +#endif // SHERPA_ONNX_CSRC_VOCODER_H_ |
sherpa-onnx/csrc/vocos-vocoder.cc
0 → 100644
| 1 | +// sherpa-onnx/csrc/vocos-vocoder.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "sherpa-onnx/csrc/vocos-vocoder.h" | ||
| 6 | + | ||
| 7 | +#include <string> | ||
| 8 | +#include <utility> | ||
| 9 | +#include <vector> | ||
| 10 | + | ||
| 11 | +#if __ANDROID_API__ >= 9 | ||
| 12 | +#include "android/asset_manager.h" | ||
| 13 | +#include "android/asset_manager_jni.h" | ||
| 14 | +#endif | ||
| 15 | + | ||
| 16 | +#if __OHOS__ | ||
| 17 | +#include "rawfile/raw_file_manager.h" | ||
| 18 | +#endif | ||
| 19 | + | ||
| 20 | +#include "kaldi-native-fbank/csrc/istft.h" | ||
| 21 | +#include "sherpa-onnx/csrc/file-utils.h" | ||
| 22 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 23 | +#include "sherpa-onnx/csrc/onnx-utils.h" | ||
| 24 | +#include "sherpa-onnx/csrc/session.h" | ||
| 25 | + | ||
| 26 | +namespace sherpa_onnx { | ||
| 27 | + | ||
| 28 | +struct VocosModelMetaData { | ||
| 29 | + int32_t n_fft; | ||
| 30 | + int32_t hop_length; | ||
| 31 | + int32_t win_length; | ||
| 32 | + int32_t center; | ||
| 33 | + int32_t normalized; | ||
| 34 | + std::string window_type; | ||
| 35 | + std::string pad_mode; | ||
| 36 | +}; | ||
| 37 | + | ||
| 38 | +class VocosVocoder::Impl { | ||
| 39 | + public: | ||
| 40 | + explicit Impl(const OfflineTtsModelConfig &config) | ||
| 41 | + : config_(config), | ||
| 42 | + env_(ORT_LOGGING_LEVEL_ERROR), | ||
| 43 | + sess_opts_(GetSessionOptions(config.num_threads, config.provider)), | ||
| 44 | + allocator_{} { | ||
| 45 | + auto buf = ReadFile(config.matcha.vocoder); | ||
| 46 | + Init(buf.data(), buf.size()); | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + template <typename Manager> | ||
| 50 | + explicit Impl(Manager *mgr, const OfflineTtsModelConfig &config) | ||
| 51 | + : config_(config), | ||
| 52 | + env_(ORT_LOGGING_LEVEL_ERROR), | ||
| 53 | + sess_opts_(GetSessionOptions(config.num_threads, config.provider)), | ||
| 54 | + allocator_{} { | ||
| 55 | + auto buf = ReadFile(mgr, config.matcha.vocoder); | ||
| 56 | + Init(buf.data(), buf.size()); | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + std::vector<float> Run(Ort::Value mel) const { | ||
| 60 | + auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1, | ||
| 61 | + output_names_ptr_.data(), output_names_ptr_.size()); | ||
| 62 | + | ||
| 63 | + std::vector<int64_t> shape = out[0].GetTensorTypeAndShapeInfo().GetShape(); | ||
| 64 | + | ||
| 65 | + if (shape[0] != 1) { | ||
| 66 | + SHERPA_ONNX_LOGE("Support only batch size 1, given: %d", | ||
| 67 | + static_cast<int32_t>(shape[0])); | ||
| 68 | + SHERPA_ONNX_EXIT(-1); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + knf::StftResult stft_result; | ||
| 72 | + stft_result.num_frames = shape[2]; | ||
| 73 | + stft_result.real.resize(shape[1] * shape[2]); | ||
| 74 | + stft_result.imag.resize(shape[1] * shape[2]); | ||
| 75 | + | ||
| 76 | + // stft_result.real: (num_frames, n_fft/2+1), flattened in row major | ||
| 77 | + | ||
| 78 | + // mag.shape: (batch_size, n_fft/2+1, num_frames) | ||
| 79 | + const float *p_mag = out[0].GetTensorData<float>(); | ||
| 80 | + const float *p_x = out[1].GetTensorData<float>(); | ||
| 81 | + const float *p_y = out[2].GetTensorData<float>(); | ||
| 82 | + | ||
| 83 | + for (int32_t frame_index = 0; frame_index < static_cast<int32_t>(shape[2]); | ||
| 84 | + ++frame_index) { | ||
| 85 | + for (int32_t bin = 0; bin < static_cast<int32_t>(shape[1]); ++bin) { | ||
| 86 | + stft_result.real[frame_index * shape[1] + bin] = | ||
| 87 | + p_mag[bin * shape[2] + frame_index] * | ||
| 88 | + p_x[bin * shape[2] + frame_index]; | ||
| 89 | + stft_result.imag[frame_index * shape[1] + bin] = | ||
| 90 | + p_mag[bin * shape[2] + frame_index] * | ||
| 91 | + p_y[bin * shape[2] + frame_index]; | ||
| 92 | + } | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + knf::StftConfig stft_config; | ||
| 96 | + stft_config.n_fft = meta_.n_fft; | ||
| 97 | + stft_config.hop_length = meta_.hop_length; | ||
| 98 | + stft_config.win_length = meta_.win_length; | ||
| 99 | + stft_config.normalized = meta_.normalized; | ||
| 100 | + stft_config.center = meta_.center; | ||
| 101 | + stft_config.window_type = meta_.window_type; | ||
| 102 | + stft_config.pad_mode = meta_.pad_mode; | ||
| 103 | + | ||
| 104 | + knf::IStft istft(stft_config); | ||
| 105 | + return istft.Compute(stft_result); | ||
| 106 | + } | ||
| 107 | + | ||
| 108 | + private: | ||
| 109 | + void Init(void *model_data, size_t model_data_length) { | ||
| 110 | + sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length, | ||
| 111 | + sess_opts_); | ||
| 112 | + | ||
| 113 | + GetInputNames(sess_.get(), &input_names_, &input_names_ptr_); | ||
| 114 | + | ||
| 115 | + GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_); | ||
| 116 | + | ||
| 117 | + // get meta data | ||
| 118 | + Ort::ModelMetadata meta_data = sess_->GetModelMetadata(); | ||
| 119 | + if (config_.debug) { | ||
| 120 | + std::ostringstream os; | ||
| 121 | + os << "---Vocos model---\n"; | ||
| 122 | + PrintModelMetadata(os, meta_data); | ||
| 123 | + | ||
| 124 | + os << "----------input names----------\n"; | ||
| 125 | + int32_t i = 0; | ||
| 126 | + for (const auto &s : input_names_) { | ||
| 127 | + os << i << " " << s << "\n"; | ||
| 128 | + ++i; | ||
| 129 | + } | ||
| 130 | + os << "----------output names----------\n"; | ||
| 131 | + i = 0; | ||
| 132 | + for (const auto &s : output_names_) { | ||
| 133 | + os << i << " " << s << "\n"; | ||
| 134 | + ++i; | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | +#if __OHOS__ | ||
| 138 | + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str()); | ||
| 139 | +#else | ||
| 140 | + SHERPA_ONNX_LOGE("%s\n", os.str().c_str()); | ||
| 141 | +#endif | ||
| 142 | + } | ||
| 143 | + | ||
| 144 | + Ort::AllocatorWithDefaultOptions allocator; // used in the macro below | ||
| 145 | + SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft"); | ||
| 146 | + SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length"); | ||
| 147 | + SHERPA_ONNX_READ_META_DATA(meta_.win_length, "win_length"); | ||
| 148 | + SHERPA_ONNX_READ_META_DATA(meta_.center, "center"); | ||
| 149 | + SHERPA_ONNX_READ_META_DATA(meta_.normalized, "normalized"); | ||
| 150 | + SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type"); | ||
| 151 | + SHERPA_ONNX_READ_META_DATA_STR(meta_.pad_mode, "pad_mode"); | ||
| 152 | + } | ||
| 153 | + | ||
| 154 | + private: | ||
| 155 | + OfflineTtsModelConfig config_; | ||
| 156 | + VocosModelMetaData meta_; | ||
| 157 | + | ||
| 158 | + Ort::Env env_; | ||
| 159 | + Ort::SessionOptions sess_opts_; | ||
| 160 | + Ort::AllocatorWithDefaultOptions allocator_; | ||
| 161 | + | ||
| 162 | + std::unique_ptr<Ort::Session> sess_; | ||
| 163 | + | ||
| 164 | + std::vector<std::string> input_names_; | ||
| 165 | + std::vector<const char *> input_names_ptr_; | ||
| 166 | + | ||
| 167 | + std::vector<std::string> output_names_; | ||
| 168 | + std::vector<const char *> output_names_ptr_; | ||
| 169 | +}; | ||
| 170 | + | ||
| 171 | +VocosVocoder::VocosVocoder(const OfflineTtsModelConfig &config) | ||
| 172 | + : impl_(std::make_unique<Impl>(config)) {} | ||
| 173 | + | ||
| 174 | +template <typename Manager> | ||
| 175 | +VocosVocoder::VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config) | ||
| 176 | + : impl_(std::make_unique<Impl>(mgr, config)) {} | ||
| 177 | + | ||
| 178 | +VocosVocoder::~VocosVocoder() = default; | ||
| 179 | + | ||
| 180 | +std::vector<float> VocosVocoder::Run(Ort::Value mel) const { | ||
| 181 | + return impl_->Run(std::move(mel)); | ||
| 182 | +} | ||
| 183 | + | ||
| 184 | +#if __ANDROID_API__ >= 9 | ||
| 185 | +template VocosVocoder::VocosVocoder(AAssetManager *mgr, | ||
| 186 | + const OfflineTtsModelConfig &config); | ||
| 187 | +#endif | ||
| 188 | + | ||
| 189 | +#if __OHOS__ | ||
| 190 | +template VocosVocoder::VocosVocoder(NativeResourceManager *mgr, | ||
| 191 | + const OfflineTtsModelConfig &config); | ||
| 192 | +#endif | ||
| 193 | + | ||
| 194 | +} // namespace sherpa_onnx |
sherpa-onnx/csrc/vocos-vocoder.h
0 → 100644
| 1 | +// sherpa-onnx/csrc/vocos-vocoder.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_ | ||
| 7 | + | ||
| 8 | +#include <memory> | ||
| 9 | +#include <string> | ||
| 10 | +#include <vector> | ||
| 11 | + | ||
| 12 | +#include "onnxruntime_cxx_api.h" // NOLINT | ||
| 13 | +#include "sherpa-onnx/csrc/offline-tts-model-config.h" | ||
| 14 | +#include "sherpa-onnx/csrc/vocoder.h" | ||
| 15 | + | ||
| 16 | +namespace sherpa_onnx { | ||
| 17 | + | ||
| 18 | +class VocosVocoder : public Vocoder { | ||
| 19 | + public: | ||
| 20 | + ~VocosVocoder() override; | ||
| 21 | + | ||
| 22 | + explicit VocosVocoder(const OfflineTtsModelConfig &config); | ||
| 23 | + | ||
| 24 | + template <typename Manager> | ||
| 25 | + VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config); | ||
| 26 | + | ||
| 27 | + /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). | ||
| 28 | + * @return Return a float32 tensor of shape (batch_size, num_samples). | ||
| 29 | + */ | ||
| 30 | + std::vector<float> Run(Ort::Value mel) const override; | ||
| 31 | + | ||
| 32 | + private: | ||
| 33 | + class Impl; | ||
| 34 | + std::unique_ptr<Impl> impl_; | ||
| 35 | +}; | ||
| 36 | + | ||
| 37 | +} // namespace sherpa_onnx | ||
| 38 | + | ||
| 39 | +#endif // SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_ |
| @@ -17,8 +17,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | @@ -17,8 +17,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 17 | rm matcha-icefall-en_US-ljspeech.tar.bz2 | 17 | rm matcha-icefall-en_US-ljspeech.tar.bz2 |
| 18 | fi | 18 | fi |
| 19 | 19 | ||
| 20 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 21 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 20 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 21 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 22 | fi | 22 | fi |
| 23 | 23 | ||
| 24 | if [ ! -e ./tts-matcha-en ]; then | 24 | if [ ! -e ./tts-matcha-en ]; then |
| @@ -16,8 +16,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | @@ -16,8 +16,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 16 | rm matcha-icefall-zh-baker.tar.bz2 | 16 | rm matcha-icefall-zh-baker.tar.bz2 |
| 17 | fi | 17 | fi |
| 18 | 18 | ||
| 19 | -if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 20 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | 19 | +if [ ! -f ./vocos-22khz-univ.onnx ]; then |
| 20 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx | ||
| 21 | fi | 21 | fi |
| 22 | 22 | ||
| 23 | if [ ! -e ./tts-matcha-zh ]; then | 23 | if [ ! -e ./tts-matcha-zh ]; then |
| @@ -6,7 +6,7 @@ class MyClass { | @@ -6,7 +6,7 @@ class MyClass { | ||
| 6 | 6 | ||
| 7 | func run() { | 7 | func run() { |
| 8 | let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx" | 8 | let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx" |
| 9 | - let vocoder = "./hifigan_v2.onnx" | 9 | + let vocoder = "./vocos-22khz-univ.onnx" |
| 10 | let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt" | 10 | let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt" |
| 11 | let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data" | 11 | let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data" |
| 12 | let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( | 12 | let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( |
| @@ -6,7 +6,7 @@ class MyClass { | @@ -6,7 +6,7 @@ class MyClass { | ||
| 6 | 6 | ||
| 7 | func run() { | 7 | func run() { |
| 8 | let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx" | 8 | let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx" |
| 9 | - let vocoder = "./hifigan_v2.onnx" | 9 | + let vocoder = "./vocos-22khz-univ.onnx" |
| 10 | let lexicon = "./matcha-icefall-zh-baker/lexicon.txt" | 10 | let lexicon = "./matcha-icefall-zh-baker/lexicon.txt" |
| 11 | let tokens = "./matcha-icefall-zh-baker/tokens.txt" | 11 | let tokens = "./matcha-icefall-zh-baker/tokens.txt" |
| 12 | let dictDir = "./matcha-icefall-zh-baker/dict" | 12 | let dictDir = "./matcha-icefall-zh-baker/dict" |
-
请 注册 或 登录 后发表评论