Fangjun Kuang
Committed by GitHub

Fix generating Chinese lexicon for Kokoro TTS 1.0 (#1888)

@@ -2,10 +2,21 @@ @@ -2,10 +2,21 @@
2 # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) 2 # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
3 3
4 import json 4 import json
5 -from pypinyin import phrases_dict, pinyin_dict  
6 -from misaki import zh  
7 from typing import List, Tuple 5 from typing import List, Tuple
8 6
  7 +from misaki import zh
  8 +from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict
  9 +
  10 +user_dict = {
  11 + "还田": [["huan2"], ["tian2"]],
  12 + "行长": [["hang2"], ["zhang3"]],
  13 + "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]],
  14 +}
  15 +
  16 +load_phrases_dict(user_dict)
  17 +
  18 +phrases_dict.phrases_dict.update(**user_dict)
  19 +
9 20
10 def generate_english_lexicon(kind: str): 21 def generate_english_lexicon(kind: str):
11 assert kind in ("us", "gb"), kind 22 assert kind in ("us", "gb"), kind
@@ -59,11 +70,13 @@ def generate_chinese_lexicon(): @@ -59,11 +70,13 @@ def generate_chinese_lexicon():
59 if not (0x4E00 <= key <= 0x9FFF): 70 if not (0x4E00 <= key <= 0x9FFF):
60 continue 71 continue
61 w = chr(key) 72 w = chr(key)
62 - tokens: str = g2p(w) 73 + tokens: str = g2p.word2ipa(w)
  74 + tokens = tokens.replace(chr(815), "")
63 lexicon.append((w, tokens)) 75 lexicon.append((w, tokens))
64 76
65 for key in phrases: 77 for key in phrases:
66 - tokens: str = g2p(key) 78 + tokens: str = g2p.word2ipa(key)
  79 + tokens = tokens.replace(chr(815), "")
67 lexicon.append((key, tokens)) 80 lexicon.append((key, tokens))
68 return lexicon 81 return lexicon
69 82
@@ -114,11 +114,6 @@ if [ ! -f ./lexicon-zh.txt ]; then @@ -114,11 +114,6 @@ if [ ! -f ./lexicon-zh.txt ]; then
114 ./generate_lexicon.py 114 ./generate_lexicon.py
115 fi 115 fi
116 116
117 -grep '还钱' ./lexicon-zh.txt  
118 -sed -i.bak 's/还钱 x a i/还钱 x w a/' ./lexicon-zh.txt  
119 -rm -v ./lexicon-zh.txt.bak  
120 -grep '还钱' ./lexicon-zh.txt  
121 -  
122 if [ ! -f ./voices.bin ]; then 117 if [ ! -f ./voices.bin ]; then
123 ./generate_voices_bin.py 118 ./generate_voices_bin.py
124 fi 119 fi