正在显示
1 个修改的文件
包含
10 行增加
和
1 行删除
| @@ -60,7 +60,16 @@ void SymbolTable::Init(std::istream &is) { | @@ -60,7 +60,16 @@ void SymbolTable::Init(std::istream &is) { | ||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | assert(!sym.empty()); | 62 | assert(!sym.empty()); |
| 63 | - assert(sym2id_.count(sym) == 0); | 63 | + |
| 64 | + // for byte bpe, after replacing ▁ with a space, whose ascii is also 0x20, | ||
| 65 | + // there is a conflict between the real byte 0x20 and ▁, so we disable | ||
| 66 | + // the following check. | ||
| 67 | + // | ||
| 68 | + // Note: Only id2sym_ matters as we use it to convert ID to symbols. | ||
| 69 | + if (sym != " ") { | ||
| 70 | + assert(sym2id_.count(sym) == 0); | ||
| 71 | + } | ||
| 72 | + | ||
| 64 | assert(id2sym_.count(id) == 0); | 73 | assert(id2sym_.count(id) == 0); |
| 65 | 74 | ||
| 66 | sym2id_.insert({sym, id}); | 75 | sym2id_.insert({sym, id}); |
-
请 注册 或 登录 后发表评论