正在显示
4 个修改的文件
包含
144 行增加
和
16 行删除
| @@ -328,6 +328,25 @@ log "Offline TTS test" | @@ -328,6 +328,25 @@ log "Offline TTS test" | ||
| 328 | # test waves are saved in ./tts | 328 | # test waves are saved in ./tts |
| 329 | mkdir ./tts | 329 | mkdir ./tts |
| 330 | 330 | ||
| 331 | +log "test kitten tts" | ||
| 332 | + | ||
| 333 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 334 | +tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 335 | +rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 336 | + | ||
| 337 | +python3 ./python-api-examples/offline-tts.py \ | ||
| 338 | + --debug=1 \ | ||
| 339 | + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \ | ||
| 340 | + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \ | ||
| 341 | + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \ | ||
| 342 | + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \ | ||
| 343 | + --num-threads=2 \ | ||
| 344 | + --sid=0 \ | ||
| 345 | + --output-filename="./tts/kitten-0.wav" \ | ||
| 346 | + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | ||
| 347 | + | ||
| 348 | +rm -rf kitten-nano-en-v0_1-fp16 | ||
| 349 | + | ||
| 331 | log "kokoro-multi-lang-v1_0 test" | 350 | log "kokoro-multi-lang-v1_0 test" |
| 332 | 351 | ||
| 333 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 | 352 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 |
| @@ -11,7 +11,7 @@ while the model is still generating. | @@ -11,7 +11,7 @@ while the model is still generating. | ||
| 11 | 11 | ||
| 12 | Usage: | 12 | Usage: |
| 13 | 13 | ||
| 14 | -Example (1/7) | 14 | +Example (1/8) |
| 15 | 15 | ||
| 16 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 16 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
| 17 | tar xf vits-piper-en_US-amy-low.tar.bz2 | 17 | tar xf vits-piper-en_US-amy-low.tar.bz2 |
| @@ -23,7 +23,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | @@ -23,7 +23,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | ||
| 23 | --output-filename=./generated.wav \ | 23 | --output-filename=./generated.wav \ |
| 24 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | 24 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 25 | 25 | ||
| 26 | -Example (2/7) | 26 | +Example (2/8) |
| 27 | 27 | ||
| 28 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 | 28 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 |
| 29 | tar xvf vits-zh-aishell3.tar.bz2 | 29 | tar xvf vits-zh-aishell3.tar.bz2 |
| @@ -37,7 +37,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | @@ -37,7 +37,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | ||
| 37 | --output-filename=./liubei-21.wav \ | 37 | --output-filename=./liubei-21.wav \ |
| 38 | "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334" | 38 | "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334" |
| 39 | 39 | ||
| 40 | -Example (3/7) | 40 | +Example (3/8) |
| 41 | 41 | ||
| 42 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 | 42 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 |
| 43 | tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 | 43 | tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 |
| @@ -53,7 +53,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | @@ -53,7 +53,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | ||
| 53 | --output-filename=./test-2.wav \ | 53 | --output-filename=./test-2.wav \ |
| 54 | "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。" | 54 | "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。" |
| 55 | 55 | ||
| 56 | -Example (4/7) | 56 | +Example (4/8) |
| 57 | 57 | ||
| 58 | curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | 58 | curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 |
| 59 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 59 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| @@ -71,7 +71,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | @@ -71,7 +71,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | ||
| 71 | --output-filename=./test-matcha.wav \ | 71 | --output-filename=./test-matcha.wav \ |
| 72 | "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" | 72 | "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" |
| 73 | 73 | ||
| 74 | -Example (5/7) | 74 | +Example (5/8) |
| 75 | 75 | ||
| 76 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | 76 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 |
| 77 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 77 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| @@ -88,7 +88,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | @@ -88,7 +88,7 @@ python3 ./python-api-examples/offline-tts-play.py \ | ||
| 88 | --num-threads=2 \ | 88 | --num-threads=2 \ |
| 89 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | 89 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 90 | 90 | ||
| 91 | -Example (6/7) | 91 | +Example (6/8) |
| 92 | 92 | ||
| 93 | (This version of kokoro supports only English) | 93 | (This version of kokoro supports only English) |
| 94 | 94 | ||
| @@ -105,9 +105,9 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -105,9 +105,9 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 105 | --num-threads=2 \ | 105 | --num-threads=2 \ |
| 106 | --sid=10 \ | 106 | --sid=10 \ |
| 107 | --output-filename="./kokoro-10.wav" \ | 107 | --output-filename="./kokoro-10.wav" \ |
| 108 | - "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be a statesman, a businessman, an official, or a scholar." | 108 | + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 109 | 109 | ||
| 110 | -Example (7/7) | 110 | +Example (7/8) |
| 111 | 111 | ||
| 112 | (This version of kokoro supports English, Chinese, etc.) | 112 | (This version of kokoro supports English, Chinese, etc.) |
| 113 | 113 | ||
| @@ -128,6 +128,23 @@ python3 ./python-api-examples/offline-tts-play.py \ | @@ -128,6 +128,23 @@ python3 ./python-api-examples/offline-tts-play.py \ | ||
| 128 | --output-filename="./kokoro-18-zh-en.wav" \ | 128 | --output-filename="./kokoro-18-zh-en.wav" \ |
| 129 | "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" | 129 | "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" |
| 130 | 130 | ||
| 131 | +Example (8/8) | ||
| 132 | + | ||
| 133 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 134 | +tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 135 | +rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 136 | + | ||
| 137 | +python3 ./python-api-examples/offline-tts-play.py \ | ||
| 138 | + --debug=1 \ | ||
| 139 | + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \ | ||
| 140 | + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \ | ||
| 141 | + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \ | ||
| 142 | + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \ | ||
| 143 | + --num-threads=2 \ | ||
| 144 | + --sid=0 \ | ||
| 145 | + --output-filename="./kitten-0.wav" \ | ||
| 146 | + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | ||
| 147 | + | ||
| 131 | You can find more models at | 148 | You can find more models at |
| 132 | https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | 149 | https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models |
| 133 | 150 | ||
| @@ -285,6 +302,36 @@ def add_kokoro_args(parser): | @@ -285,6 +302,36 @@ def add_kokoro_args(parser): | ||
| 285 | ) | 302 | ) |
| 286 | 303 | ||
| 287 | 304 | ||
| 305 | +def add_kitten_args(parser): | ||
| 306 | + parser.add_argument( | ||
| 307 | + "--kitten-model", | ||
| 308 | + type=str, | ||
| 309 | + default="", | ||
| 310 | + help="Path to model.onnx for kitten", | ||
| 311 | + ) | ||
| 312 | + | ||
| 313 | + parser.add_argument( | ||
| 314 | + "--kitten-voices", | ||
| 315 | + type=str, | ||
| 316 | + default="", | ||
| 317 | + help="Path to voices.bin for kitten", | ||
| 318 | + ) | ||
| 319 | + | ||
| 320 | + parser.add_argument( | ||
| 321 | + "--kitten-tokens", | ||
| 322 | + type=str, | ||
| 323 | + default="", | ||
| 324 | + help="Path to tokens.txt for kitten", | ||
| 325 | + ) | ||
| 326 | + | ||
| 327 | + parser.add_argument( | ||
| 328 | + "--kitten-data-dir", | ||
| 329 | + type=str, | ||
| 330 | + default="", | ||
| 331 | + help="Path to the dict directory of espeak-ng.", | ||
| 332 | + ) | ||
| 333 | + | ||
| 334 | + | ||
| 288 | def get_args(): | 335 | def get_args(): |
| 289 | parser = argparse.ArgumentParser( | 336 | parser = argparse.ArgumentParser( |
| 290 | formatter_class=argparse.ArgumentDefaultsHelpFormatter | 337 | formatter_class=argparse.ArgumentDefaultsHelpFormatter |
| @@ -293,6 +340,7 @@ def get_args(): | @@ -293,6 +340,7 @@ def get_args(): | ||
| 293 | add_vits_args(parser) | 340 | add_vits_args(parser) |
| 294 | add_matcha_args(parser) | 341 | add_matcha_args(parser) |
| 295 | add_kokoro_args(parser) | 342 | add_kokoro_args(parser) |
| 343 | + add_kitten_args(parser) | ||
| 296 | 344 | ||
| 297 | parser.add_argument( | 345 | parser.add_argument( |
| 298 | "--tts-rule-fsts", | 346 | "--tts-rule-fsts", |
| @@ -499,6 +547,12 @@ def main(): | @@ -499,6 +547,12 @@ def main(): | ||
| 499 | dict_dir=args.kokoro_dict_dir, | 547 | dict_dir=args.kokoro_dict_dir, |
| 500 | lexicon=args.kokoro_lexicon, | 548 | lexicon=args.kokoro_lexicon, |
| 501 | ), | 549 | ), |
| 550 | + kitten=sherpa_onnx.OfflineTtsKittenModelConfig( | ||
| 551 | + model=args.kitten_model, | ||
| 552 | + voices=args.kitten_voices, | ||
| 553 | + tokens=args.kitten_tokens, | ||
| 554 | + data_dir=args.kitten_data_dir, | ||
| 555 | + ), | ||
| 502 | provider=args.provider, | 556 | provider=args.provider, |
| 503 | debug=args.debug, | 557 | debug=args.debug, |
| 504 | num_threads=args.num_threads, | 558 | num_threads=args.num_threads, |
| @@ -12,7 +12,7 @@ generated audio. | @@ -12,7 +12,7 @@ generated audio. | ||
| 12 | 12 | ||
| 13 | Usage: | 13 | Usage: |
| 14 | 14 | ||
| 15 | -Example (1/7) | 15 | +Example (1/8) |
| 16 | 16 | ||
| 17 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 17 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
| 18 | tar xf vits-piper-en_US-amy-low.tar.bz2 | 18 | tar xf vits-piper-en_US-amy-low.tar.bz2 |
| @@ -24,7 +24,7 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -24,7 +24,7 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 24 | --output-filename=./generated.wav \ | 24 | --output-filename=./generated.wav \ |
| 25 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | 25 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 26 | 26 | ||
| 27 | -Example (2/7) | 27 | +Example (2/8) |
| 28 | 28 | ||
| 29 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 | 29 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| 30 | tar xvf vits-icefall-zh-aishell3.tar.bz2 | 30 | tar xvf vits-icefall-zh-aishell3.tar.bz2 |
| @@ -38,7 +38,7 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -38,7 +38,7 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 38 | --output-filename=./liubei-21.wav \ | 38 | --output-filename=./liubei-21.wav \ |
| 39 | "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334" | 39 | "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334" |
| 40 | 40 | ||
| 41 | -Example (3/7) | 41 | +Example (3/8) |
| 42 | 42 | ||
| 43 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 | 43 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 |
| 44 | tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 | 44 | tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 |
| @@ -54,7 +54,7 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -54,7 +54,7 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 54 | --output-filename=./test-2.wav \ | 54 | --output-filename=./test-2.wav \ |
| 55 | "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。" | 55 | "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。" |
| 56 | 56 | ||
| 57 | -Example (4/7) | 57 | +Example (4/8) |
| 58 | 58 | ||
| 59 | curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | 59 | curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 |
| 60 | tar xvf matcha-icefall-zh-baker.tar.bz2 | 60 | tar xvf matcha-icefall-zh-baker.tar.bz2 |
| @@ -72,7 +72,7 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -72,7 +72,7 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 72 | --output-filename=./test-matcha.wav \ | 72 | --output-filename=./test-matcha.wav \ |
| 73 | "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" | 73 | "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" |
| 74 | 74 | ||
| 75 | -Example (5/7) | 75 | +Example (5/8) |
| 76 | 76 | ||
| 77 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | 77 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 |
| 78 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 | 78 | tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 |
| @@ -89,7 +89,7 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -89,7 +89,7 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 89 | --num-threads=2 \ | 89 | --num-threads=2 \ |
| 90 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | 90 | "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 91 | 91 | ||
| 92 | -Example (6/7) | 92 | +Example (6/8) |
| 93 | 93 | ||
| 94 | (This version of kokoro supports only English) | 94 | (This version of kokoro supports only English) |
| 95 | 95 | ||
| @@ -106,9 +106,9 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -106,9 +106,9 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 106 | --num-threads=2 \ | 106 | --num-threads=2 \ |
| 107 | --sid=10 \ | 107 | --sid=10 \ |
| 108 | --output-filename="./kokoro-10.wav" \ | 108 | --output-filename="./kokoro-10.wav" \ |
| 109 | - "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be a statesman, a businessman, an official, or a scholar." | 109 | + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." |
| 110 | 110 | ||
| 111 | -Example (7/7) | 111 | +Example (7/8) |
| 112 | 112 | ||
| 113 | (This version of kokoro supports English, Chinese, etc.) | 113 | (This version of kokoro supports English, Chinese, etc.) |
| 114 | 114 | ||
| @@ -129,6 +129,23 @@ python3 ./python-api-examples/offline-tts.py \ | @@ -129,6 +129,23 @@ python3 ./python-api-examples/offline-tts.py \ | ||
| 129 | --output-filename="./kokoro-18-zh-en.wav" \ | 129 | --output-filename="./kokoro-18-zh-en.wav" \ |
| 130 | "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" | 130 | "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" |
| 131 | 131 | ||
| 132 | +Example (8/8) | ||
| 133 | + | ||
| 134 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 135 | +tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 136 | +rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 137 | + | ||
| 138 | +python3 ./python-api-examples/offline-tts.py \ | ||
| 139 | + --debug=1 \ | ||
| 140 | + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \ | ||
| 141 | + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \ | ||
| 142 | + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \ | ||
| 143 | + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \ | ||
| 144 | + --num-threads=2 \ | ||
| 145 | + --sid=0 \ | ||
| 146 | + --output-filename="./kitten-0.wav" \ | ||
| 147 | + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." | ||
| 148 | + | ||
| 132 | You can find more models at | 149 | You can find more models at |
| 133 | https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | 150 | https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models |
| 134 | 151 | ||
| @@ -272,6 +289,36 @@ def add_kokoro_args(parser): | @@ -272,6 +289,36 @@ def add_kokoro_args(parser): | ||
| 272 | ) | 289 | ) |
| 273 | 290 | ||
| 274 | 291 | ||
| 292 | +def add_kitten_args(parser): | ||
| 293 | + parser.add_argument( | ||
| 294 | + "--kitten-model", | ||
| 295 | + type=str, | ||
| 296 | + default="", | ||
| 297 | + help="Path to model.onnx for kitten", | ||
| 298 | + ) | ||
| 299 | + | ||
| 300 | + parser.add_argument( | ||
| 301 | + "--kitten-voices", | ||
| 302 | + type=str, | ||
| 303 | + default="", | ||
| 304 | + help="Path to voices.bin for kitten", | ||
| 305 | + ) | ||
| 306 | + | ||
| 307 | + parser.add_argument( | ||
| 308 | + "--kitten-tokens", | ||
| 309 | + type=str, | ||
| 310 | + default="", | ||
| 311 | + help="Path to tokens.txt for kitten", | ||
| 312 | + ) | ||
| 313 | + | ||
| 314 | + parser.add_argument( | ||
| 315 | + "--kitten-data-dir", | ||
| 316 | + type=str, | ||
| 317 | + default="", | ||
| 318 | + help="Path to the dict directory of espeak-ng.", | ||
| 319 | + ) | ||
| 320 | + | ||
| 321 | + | ||
| 275 | def get_args(): | 322 | def get_args(): |
| 276 | parser = argparse.ArgumentParser( | 323 | parser = argparse.ArgumentParser( |
| 277 | formatter_class=argparse.ArgumentDefaultsHelpFormatter | 324 | formatter_class=argparse.ArgumentDefaultsHelpFormatter |
| @@ -280,6 +327,7 @@ def get_args(): | @@ -280,6 +327,7 @@ def get_args(): | ||
| 280 | add_vits_args(parser) | 327 | add_vits_args(parser) |
| 281 | add_matcha_args(parser) | 328 | add_matcha_args(parser) |
| 282 | add_kokoro_args(parser) | 329 | add_kokoro_args(parser) |
| 330 | + add_kitten_args(parser) | ||
| 283 | 331 | ||
| 284 | parser.add_argument( | 332 | parser.add_argument( |
| 285 | "--tts-rule-fsts", | 333 | "--tts-rule-fsts", |
| @@ -382,6 +430,12 @@ def main(): | @@ -382,6 +430,12 @@ def main(): | ||
| 382 | dict_dir=args.kokoro_dict_dir, | 430 | dict_dir=args.kokoro_dict_dir, |
| 383 | lexicon=args.kokoro_lexicon, | 431 | lexicon=args.kokoro_lexicon, |
| 384 | ), | 432 | ), |
| 433 | + kitten=sherpa_onnx.OfflineTtsKittenModelConfig( | ||
| 434 | + model=args.kitten_model, | ||
| 435 | + voices=args.kitten_voices, | ||
| 436 | + tokens=args.kitten_tokens, | ||
| 437 | + data_dir=args.kitten_data_dir, | ||
| 438 | + ), | ||
| 385 | provider=args.provider, | 439 | provider=args.provider, |
| 386 | debug=args.debug, | 440 | debug=args.debug, |
| 387 | num_threads=args.num_threads, | 441 | num_threads=args.num_threads, |
| @@ -44,6 +44,7 @@ from _sherpa_onnx import ( | @@ -44,6 +44,7 @@ from _sherpa_onnx import ( | ||
| 44 | OfflineTransducerModelConfig, | 44 | OfflineTransducerModelConfig, |
| 45 | OfflineTts, | 45 | OfflineTts, |
| 46 | OfflineTtsConfig, | 46 | OfflineTtsConfig, |
| 47 | + OfflineTtsKittenModelConfig, | ||
| 47 | OfflineTtsKokoroModelConfig, | 48 | OfflineTtsKokoroModelConfig, |
| 48 | OfflineTtsMatchaModelConfig, | 49 | OfflineTtsMatchaModelConfig, |
| 49 | OfflineTtsModelConfig, | 50 | OfflineTtsModelConfig, |
-
请 注册 或 登录 后发表评论