Fangjun Kuang
Committed by GitHub

Add Kokoro TTS to MFC examples (#1760)

1 B// Microsoft Visual C++ generated resource script. 1 B// Microsoft Visual C++ generated resource script.
@@ -419,7 +419,7 @@ bool Exists(const std::string &filename) { @@ -419,7 +419,7 @@ bool Exists(const std::string &filename) {
419 void CNonStreamingTextToSpeechDlg::InitHint() { 419 void CNonStreamingTextToSpeechDlg::InitHint() {
420 AppendLineToMultilineEditCtrl(my_hint_, "Speaker ID: Used only for multi-speaker models. Example value: 0"); 420 AppendLineToMultilineEditCtrl(my_hint_, "Speaker ID: Used only for multi-speaker models. Example value: 0");
421 AppendLineToMultilineEditCtrl(my_hint_, "Speed: Larger -> Faster in speech speed. Example value: 1.0"); 421 AppendLineToMultilineEditCtrl(my_hint_, "Speed: Larger -> Faster in speech speed. Example value: 1.0");
422 - AppendLineToMultilineEditCtrl(my_hint_, "\r\n\r\nPlease input your text and click the button Generate"); 422 + AppendLineToMultilineEditCtrl(my_hint_, "\r\nPlease input your text and click the button Generate");
423 423
424 } 424 }
425 425
@@ -430,7 +430,7 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -430,7 +430,7 @@ void CNonStreamingTextToSpeechDlg::Init() {
430 output_filename_.SetWindowText(Utf8ToUtf16("./generated.wav").c_str()); 430 output_filename_.SetWindowText(Utf8ToUtf16("./generated.wav").c_str());
431 431
432 bool ok = true; 432 bool ok = true;
433 - std::string error_message = "--------------------"; 433 + std::string error_message = "--------------------\r\n";
434 if (!Exists("./model.onnx")) { 434 if (!Exists("./model.onnx")) {
435 error_message += "Cannot find ./model.onnx\r\n"; 435 error_message += "Cannot find ./model.onnx\r\n";
436 ok = false; 436 ok = false;
@@ -447,17 +447,64 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -447,17 +447,64 @@ void CNonStreamingTextToSpeechDlg::Init() {
447 generate_btn_.EnableWindow(FALSE); 447 generate_btn_.EnableWindow(FALSE);
448 error_message += 448 error_message +=
449 "\r\nPlease refer to\r\n" 449 "\r\nPlease refer to\r\n"
450 - "https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models"; 450 + "https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models"
  451 + "\r\nor\r\n"
  452 + "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models";
  453 +
451 error_message += "\r\nto download models.\r\n"; 454 error_message += "\r\nto download models.\r\n";
452 - error_message += "\r\nWe give an example below\r\n\r\n"; 455 + error_message += "\r\nWe give several examples below\r\n";
  456 + error_message += " 1. Use a Kokoro TTS model\r\n";
  457 + error_message += " 2. Use a VITS Piper TTS model\r\n";
  458 + error_message += " 3. Use a VITS Chinese TTS model\r\n";
  459 + error_message += " 4. Use a Matcha TTS model\r\n";
  460 + error_message += "\r\n";
  461 + error_message +=
  462 + "----------1. Use a Kokoro TTS model----------\r\n"
  463 + "(a) Download the model from \r\n"
  464 + " https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2\r\n"
  465 + "(b) Uncompress it and you will get a directory kokoro-en-v0_19\r\n"
  466 + "(c) Switch to the directory kokoro-en-v0_19\r\n"
  467 + "(d) Copy the current exe to the directory kokoro-en-v0_19\r\n"
  468 + "(e).Done! You can now run the exe in the directory kokoro-en-v0_19\r\n";
  469 +
  470 + error_message += "\r\n";
  471 +
  472 + error_message +=
  473 + "----------2. Use a VITS Piper TTS model----------\r\n"
  474 + "(a) Download the model from \r\n"
  475 + " https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2\r\n"
  476 + "(b) Uncompress it and you will get a directory vits-piper-en_US-amy-low\r\n"
  477 + "(c) Switch to the directory vits-piper-en_US-amy-low \r\n"
  478 + "(d) Rename en_US-amy-low.onnx to model.onnx\r\n"
  479 + "(e) Copy the current exe to the directory vits-piper-en_US-amy-low\r\n"
  480 + "(f) Done! You can now run the exe in the directory vits-piper-en_US-amy-low\r\n";
  481 +
  482 + error_message += "\r\n";
  483 +
453 error_message += 484 error_message +=
454 - "1. Download vits-piper-en_US-amy-low.tar.bz2 from the following URL\r\n\r\n"  
455 - "https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2\r\n\r\n"  
456 - "2. Uncompress it and you will get a directory vits-piper-en_US-amy-low \r\n\r\n"  
457 - "3. Switch to the directory vits-piper-en_US-amy-low \r\n\r\n"  
458 - "4. Rename en_US-amy-low.onnx to model.onnx \r\n\r\n"  
459 - "5. Copy the current exe to the directory vits-piper-en_US-amy-low\r\n\r\n"  
460 - "6. Done! You can now run the exe in the directory vits-piper-en_US-amy-low\r\n\r\n"; 485 + "----------3. Use a VITS Chinese TTS model----------\r\n"
  486 + "(a) Download the model from \r\n"
  487 + " https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2\r\n"
  488 + "(b) Uncompress it and you will get a directory sherpa-onnx-vits-zh-ll\r\n"
  489 + "(c) Switch to the directory sherpa-onnx-vits-zh-ll\r\n"
  490 + "(d) Copy the current exe to the directory sherpa-onnx-vits-zh-ll\r\n"
  491 + "(e) Done! You can now run the exe in the directory sherpa-onnx-vits-zh-ll\r\n";
  492 +
  493 + error_message += "\r\n";
  494 +
  495 + error_message +=
  496 + "----------4. Use a Matcha TTS model----------\r\n"
  497 + "(a) Download the model from \r\n"
  498 + " https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2\r\n"
  499 + "(b) Uncompress it and you will get a directory matcha-icefall-zh-baker\r\n"
  500 + "(c) Switch to the directory matcha-icefall-zh-baker\r\n"
  501 + "(d) Rename model-steps-3.onnx to model.onnx\r\n"
  502 + "(e) Download a vocoder model from \r\n"
  503 + " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx\r\n"
  504 + "(f) Rename hifigan_v2.onnx to hifigan.onnx\r\n"
  505 + "(g) Remember to put hifigan.onnx in the directory matcha-icefall-zh-baker\r\n"
  506 + "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n"
  507 + "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n";
461 508
462 AppendLineToMultilineEditCtrl(my_hint_, error_message); 509 AppendLineToMultilineEditCtrl(my_hint_, error_message);
463 return; 510 return;
@@ -467,18 +514,48 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -467,18 +514,48 @@ void CNonStreamingTextToSpeechDlg::Init() {
467 SherpaOnnxOfflineTtsConfig config; 514 SherpaOnnxOfflineTtsConfig config;
468 memset(&config, 0, sizeof(config)); 515 memset(&config, 0, sizeof(config));
469 config.model.debug = 0; 516 config.model.debug = 0;
470 - config.model.num_threads = 2; 517 + config.model.num_threads = 4;
471 config.model.provider = "cpu"; 518 config.model.provider = "cpu";
  519 +
  520 + if (Exists("./voices.bin")) {
  521 + // it is a kokoro tts model
  522 + config.model.kokoro.model = "./model.onnx";
  523 + config.model.kokoro.voices = "./voices.bin";
  524 + config.model.kokoro.tokens = "./tokens.txt";
  525 + config.model.kokoro.data_dir = "./espeak-ng-data";
  526 + } else if (Exists("./hifigan.onnx")) {
  527 + // it is a matcha tts model
  528 + config.model.matcha.acoustic_model = "./model.onnx";
  529 + config.model.matcha.vocoder = "./hifigan.onnx";
  530 + config.model.matcha.tokens = "./tokens.txt";
  531 +
  532 + if (Exists("./espeak-ng-data/phontab")) {
  533 + config.model.matcha.data_dir = "./espeak-ng-data";
  534 + }
  535 +
  536 + if(Exists("./lexicon.txt")) {
  537 + config.model.matcha.lexicon = "./lexicon.txt";
  538 + }
  539 +
  540 + if (Exists("./dict/jieba.dict.utf8")) {
  541 + config.model.matcha.dict_dir = "./dict";
  542 + }
  543 + } else {
  544 + // it is a vits tts model
472 config.model.vits.model = "./model.onnx"; 545 config.model.vits.model = "./model.onnx";
  546 + config.model.vits.tokens = "./tokens.txt";
473 if (Exists("./espeak-ng-data/phontab")) { 547 if (Exists("./espeak-ng-data/phontab")) {
474 config.model.vits.data_dir = "./espeak-ng-data"; 548 config.model.vits.data_dir = "./espeak-ng-data";
475 - } else if (Exists("./lexicon.txt")) { 549 + }
  550 +
  551 + if (Exists("./lexicon.txt")) {
476 config.model.vits.lexicon = "./lexicon.txt"; 552 config.model.vits.lexicon = "./lexicon.txt";
477 } 553 }
478 554
479 if (Exists("./dict/jieba.dict.utf8")) { 555 if (Exists("./dict/jieba.dict.utf8")) {
480 config.model.vits.dict_dir = "./dict"; 556 config.model.vits.dict_dir = "./dict";
481 } 557 }
  558 + }
482 559
483 if (Exists("./phone.fst") && Exists("./date.fst") && Exists("./number.fst")) { 560 if (Exists("./phone.fst") && Exists("./date.fst") && Exists("./number.fst")) {
484 config.rule_fsts = "./phone.fst,./date.fst,number.fst"; 561 config.rule_fsts = "./phone.fst,./date.fst,number.fst";
@@ -488,8 +565,6 @@ void CNonStreamingTextToSpeechDlg::Init() { @@ -488,8 +565,6 @@ void CNonStreamingTextToSpeechDlg::Init() {
488 config.rule_fars = "./rule.far"; 565 config.rule_fars = "./rule.far";
489 } 566 }
490 567
491 - config.model.vits.tokens = "./tokens.txt";  
492 -  
493 tts_ = SherpaOnnxCreateOfflineTts(&config); 568 tts_ = SherpaOnnxCreateOfflineTts(&config);
494 } 569 }
495 570