Fangjun Kuang
Committed by GitHub

Fix the last character not being recognized for streaming paraformer models. (#799)

@@ -113,6 +113,20 @@ as the device_name. @@ -113,6 +113,20 @@ as the device_name.
113 113
114 bool is_endpoint = recognizer.IsEndpoint(stream.get()); 114 bool is_endpoint = recognizer.IsEndpoint(stream.get());
115 115
  116 + if (is_endpoint && !config.model_config.paraformer.encoder.empty()) {
  117 + // For streaming paraformer models, since it has a large right chunk size
  118 + // we need to pad it on endpointing so that the last character
  119 + // can be recognized
  120 + std::vector<float> tail_paddings(
  121 + static_cast<int>(1.0 * expected_sample_rate));
  122 + stream->AcceptWaveform(expected_sample_rate, tail_paddings.data(),
  123 + tail_paddings.size());
  124 + while (recognizer.IsReady(stream.get())) {
  125 + recognizer.DecodeStream(stream.get());
  126 + }
  127 + text = recognizer.GetResult(stream.get()).text;
  128 + }
  129 +
116 if (!text.empty() && last_text != text) { 130 if (!text.empty() && last_text != text) {
117 last_text = text; 131 last_text = text;
118 132
@@ -157,6 +157,19 @@ for a list of pre-trained models to download. @@ -157,6 +157,19 @@ for a list of pre-trained models to download.
157 auto text = recognizer.GetResult(s.get()).text; 157 auto text = recognizer.GetResult(s.get()).text;
158 bool is_endpoint = recognizer.IsEndpoint(s.get()); 158 bool is_endpoint = recognizer.IsEndpoint(s.get());
159 159
  160 + if (is_endpoint && !config.model_config.paraformer.encoder.empty()) {
  161 + // For streaming paraformer models, since it has a large right chunk size
  162 + // we need to pad it on endpointing so that the last character
  163 + // can be recognized
  164 + std::vector<float> tail_paddings(static_cast<int>(1.0 * mic_sample_rate));
  165 + s->AcceptWaveform(mic_sample_rate, tail_paddings.data(),
  166 + tail_paddings.size());
  167 + while (recognizer.IsReady(s.get())) {
  168 + recognizer.DecodeStream(s.get());
  169 + }
  170 + text = recognizer.GetResult(s.get()).text;
  171 + }
  172 +
160 if (!text.empty() && last_text != text) { 173 if (!text.empty() && last_text != text) {
161 last_text = text; 174 last_text = text;
162 175