Committed by
GitHub
Fix the last character not being recognized for streaming paraformer models. (#799)
正在显示
2 个修改的文件
包含
27 行增加
和
0 行删除
| @@ -113,6 +113,20 @@ as the device_name. | @@ -113,6 +113,20 @@ as the device_name. | ||
| 113 | 113 | ||
| 114 | bool is_endpoint = recognizer.IsEndpoint(stream.get()); | 114 | bool is_endpoint = recognizer.IsEndpoint(stream.get()); |
| 115 | 115 | ||
| 116 | + if (is_endpoint && !config.model_config.paraformer.encoder.empty()) { | ||
| 117 | + // For streaming paraformer models, since it has a large right chunk size | ||
| 118 | + // we need to pad it on endpointing so that the last character | ||
| 119 | + // can be recognized | ||
| 120 | + std::vector<float> tail_paddings( | ||
| 121 | + static_cast<int>(1.0 * expected_sample_rate)); | ||
| 122 | + stream->AcceptWaveform(expected_sample_rate, tail_paddings.data(), | ||
| 123 | + tail_paddings.size()); | ||
| 124 | + while (recognizer.IsReady(stream.get())) { | ||
| 125 | + recognizer.DecodeStream(stream.get()); | ||
| 126 | + } | ||
| 127 | + text = recognizer.GetResult(stream.get()).text; | ||
| 128 | + } | ||
| 129 | + | ||
| 116 | if (!text.empty() && last_text != text) { | 130 | if (!text.empty() && last_text != text) { |
| 117 | last_text = text; | 131 | last_text = text; |
| 118 | 132 |
| @@ -157,6 +157,19 @@ for a list of pre-trained models to download. | @@ -157,6 +157,19 @@ for a list of pre-trained models to download. | ||
| 157 | auto text = recognizer.GetResult(s.get()).text; | 157 | auto text = recognizer.GetResult(s.get()).text; |
| 158 | bool is_endpoint = recognizer.IsEndpoint(s.get()); | 158 | bool is_endpoint = recognizer.IsEndpoint(s.get()); |
| 159 | 159 | ||
| 160 | + if (is_endpoint && !config.model_config.paraformer.encoder.empty()) { | ||
| 161 | + // For streaming paraformer models, since it has a large right chunk size | ||
| 162 | + // we need to pad it on endpointing so that the last character | ||
| 163 | + // can be recognized | ||
| 164 | + std::vector<float> tail_paddings(static_cast<int>(1.0 * mic_sample_rate)); | ||
| 165 | + s->AcceptWaveform(mic_sample_rate, tail_paddings.data(), | ||
| 166 | + tail_paddings.size()); | ||
| 167 | + while (recognizer.IsReady(s.get())) { | ||
| 168 | + recognizer.DecodeStream(s.get()); | ||
| 169 | + } | ||
| 170 | + text = recognizer.GetResult(s.get()).text; | ||
| 171 | + } | ||
| 172 | + | ||
| 160 | if (!text.empty() && last_text != text) { | 173 | if (!text.empty() && last_text != text) { |
| 161 | last_text = text; | 174 | last_text = text; |
| 162 | 175 |
-
请 注册 或 登录 后发表评论