Fangjun Kuang
Committed by GitHub

Fix python two pass ASR examples (#1230)

@@ -335,11 +335,10 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer: @@ -335,11 +335,10 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer:
335 335
336 def run_second_pass( 336 def run_second_pass(
337 recognizer: sherpa_onnx.OfflineRecognizer, 337 recognizer: sherpa_onnx.OfflineRecognizer,
338 - sample_buffers: List[np.ndarray], 338 + samples: np.ndarray,
339 sample_rate: int, 339 sample_rate: int,
340 ): 340 ):
341 stream = recognizer.create_stream() 341 stream = recognizer.create_stream()
342 - samples = np.concatenate(sample_buffers)  
343 stream.accept_waveform(sample_rate, samples) 342 stream.accept_waveform(sample_rate, samples)
344 343
345 recognizer.decode_stream(stream) 344 recognizer.decode_stream(stream)
@@ -407,14 +406,20 @@ def main(): @@ -407,14 +406,20 @@ def main():
407 406
408 if is_endpoint: 407 if is_endpoint:
409 if result: 408 if result:
  409 + samples = np.concatenate(sample_buffers)
  410 + # There are internal sample buffers inside the streaming
  411 + # feature extractor, so we cannot send all samples to
  412 + # the 2nd pass. Here 8000 is just an empirical value
  413 + # that should work for most streaming models in sherpa-onnx
  414 + sample_buffers = [samples[-8000:]]
  415 + samples = samples[:-8000]
410 result = run_second_pass( 416 result = run_second_pass(
411 recognizer=second_recognizer, 417 recognizer=second_recognizer,
412 - sample_buffers=sample_buffers, 418 + samples=samples,
413 sample_rate=sample_rate, 419 sample_rate=sample_rate,
414 ) 420 )
415 result = result.lower().strip() 421 result = result.lower().strip()
416 422
417 - sample_buffers = []  
418 print( 423 print(
419 "\r{}:{}".format(segment_id, " " * len(last_result)), 424 "\r{}:{}".format(segment_id, " " * len(last_result)),
420 end="", 425 end="",
@@ -18,8 +18,8 @@ The input text can contain English words. @@ -18,8 +18,8 @@ The input text can contain English words.
18 18
19 Usage: 19 Usage:
20 20
21 -Please download the model from:  
22 -https://huggingface.co/frankyoujian/Edge-Punct-Casing/resolve/main/sherpa-onnx-cnn-bilstm-unigram-bpe-en.7z 21 +Please download the model from:
  22 +https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
23 23
24 ./bin/Release/sherpa-onnx-online-punctuation \ 24 ./bin/Release/sherpa-onnx-online-punctuation \
25 --cnn-bilstm=/path/to/model.onnx \ 25 --cnn-bilstm=/path/to/model.onnx \