Fangjun Kuang
Committed by GitHub

describe how to add new words for MeloTTS models (#1209)

@@ -87,6 +87,8 @@ jobs: @@ -87,6 +87,8 @@ jobs:
87 87
88 git status 88 git status
89 89
  90 + git diff
  91 +
90 git commit -m "add models" 92 git commit -m "add models"
91 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en main || true 93 git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en main || true
92 94
@@ -78,10 +78,52 @@ def generate_tokens(symbol_list): @@ -78,10 +78,52 @@ def generate_tokens(symbol_list):
78 f.write(f"{s} {i}\n") 78 f.write(f"{s} {i}\n")
79 79
80 80
  81 +def add_new_english_words(lexicon):
  82 + """
  83 + Args:
  84 + lexicon:
  85 + Please modify it in-place.
  86 + """
  87 +
  88 + # Please have a look at
  89 + # https://github.com/myshell-ai/MeloTTS/blob/main/melo/text/cmudict.rep
  90 +
  91 + # We give several examples below about how to add new words
  92 +
  93 + # Example 1. Add a new word kaldi
  94 +
  95 + # It does not contain the word kaldi in cmudict.rep
  96 + # so if we add the following line to cmudict.rep
  97 + #
  98 + # KALDI K AH0 - L D IH0
  99 + #
  100 + # then we need to change the lexicon like below
  101 + lexicon["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]]
  102 + #
  103 + # K AH0 and L D IH0 are separated by a dash "-", so
  104 + # ["K", "AH0"] is a in list and ["L", "D", "IH0"] is in a separate list
  105 +
  106 + # Note: Either kaldi or KALDI is fine. You can use either lowercase or
  107 + # uppercase or both
  108 +
  109 + # Example 2. Add a new word SF
  110 + #
  111 + # If we add the following line to cmudict.rep
  112 + #
  113 + # SF EH1 S - EH1 F
  114 + #
  115 + # to cmudict.rep, then we need to change the lexicon like below:
  116 + lexicon["SF"] = [["EH1", "S"], ["EH1", "F"]]
  117 +
  118 + # Please add your new words here
  119 +
  120 + # No need to return lexicon since it is changed in-place
  121 +
  122 +
81 def generate_lexicon(): 123 def generate_lexicon():
82 word_dict = pinyin_dict.pinyin_dict 124 word_dict = pinyin_dict.pinyin_dict
83 phrases = phrases_dict.phrases_dict 125 phrases = phrases_dict.phrases_dict
84 - eng_dict["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]] 126 + add_new_english_words(eng_dict)
85 with open("lexicon.txt", "w", encoding="utf-8") as f: 127 with open("lexicon.txt", "w", encoding="utf-8") as f:
86 for word in eng_dict: 128 for word in eng_dict:
87 phones, tones = refine_syllables(eng_dict[word]) 129 phones, tones = refine_syllables(eng_dict[word])