Committed by
GitHub
Fix generating Chinese lexicon for Kokoro TTS 1.0 (#1888)
正在显示
2 个修改的文件
包含
17 行增加
和
9 行删除
| @@ -2,10 +2,21 @@ | @@ -2,10 +2,21 @@ | ||
| 2 | # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | 2 | # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) |
| 3 | 3 | ||
| 4 | import json | 4 | import json |
| 5 | -from pypinyin import phrases_dict, pinyin_dict | ||
| 6 | -from misaki import zh | ||
| 7 | from typing import List, Tuple | 5 | from typing import List, Tuple |
| 8 | 6 | ||
| 7 | +from misaki import zh | ||
| 8 | +from pypinyin import load_phrases_dict, phrases_dict, pinyin_dict | ||
| 9 | + | ||
| 10 | +user_dict = { | ||
| 11 | + "还田": [["huan2"], ["tian2"]], | ||
| 12 | + "行长": [["hang2"], ["zhang3"]], | ||
| 13 | + "银行行长": [["yin2"], ["hang2"], ["hang2"], ["zhang3"]], | ||
| 14 | +} | ||
| 15 | + | ||
| 16 | +load_phrases_dict(user_dict) | ||
| 17 | + | ||
| 18 | +phrases_dict.phrases_dict.update(**user_dict) | ||
| 19 | + | ||
| 9 | 20 | ||
| 10 | def generate_english_lexicon(kind: str): | 21 | def generate_english_lexicon(kind: str): |
| 11 | assert kind in ("us", "gb"), kind | 22 | assert kind in ("us", "gb"), kind |
| @@ -59,11 +70,13 @@ def generate_chinese_lexicon(): | @@ -59,11 +70,13 @@ def generate_chinese_lexicon(): | ||
| 59 | if not (0x4E00 <= key <= 0x9FFF): | 70 | if not (0x4E00 <= key <= 0x9FFF): |
| 60 | continue | 71 | continue |
| 61 | w = chr(key) | 72 | w = chr(key) |
| 62 | - tokens: str = g2p(w) | 73 | + tokens: str = g2p.word2ipa(w) |
| 74 | + tokens = tokens.replace(chr(815), "") | ||
| 63 | lexicon.append((w, tokens)) | 75 | lexicon.append((w, tokens)) |
| 64 | 76 | ||
| 65 | for key in phrases: | 77 | for key in phrases: |
| 66 | - tokens: str = g2p(key) | 78 | + tokens: str = g2p.word2ipa(key) |
| 79 | + tokens = tokens.replace(chr(815), "") | ||
| 67 | lexicon.append((key, tokens)) | 80 | lexicon.append((key, tokens)) |
| 68 | return lexicon | 81 | return lexicon |
| 69 | 82 |
| @@ -114,11 +114,6 @@ if [ ! -f ./lexicon-zh.txt ]; then | @@ -114,11 +114,6 @@ if [ ! -f ./lexicon-zh.txt ]; then | ||
| 114 | ./generate_lexicon.py | 114 | ./generate_lexicon.py |
| 115 | fi | 115 | fi |
| 116 | 116 | ||
| 117 | -grep '还钱' ./lexicon-zh.txt | ||
| 118 | -sed -i.bak 's/还钱 x a i/还钱 x w a/' ./lexicon-zh.txt | ||
| 119 | -rm -v ./lexicon-zh.txt.bak | ||
| 120 | -grep '还钱' ./lexicon-zh.txt | ||
| 121 | - | ||
| 122 | if [ ! -f ./voices.bin ]; then | 117 | if [ ! -f ./voices.bin ]; then |
| 123 | ./generate_voices_bin.py | 118 | ./generate_voices_bin.py |
| 124 | fi | 119 | fi |
-
请 注册 或 登录 后发表评论