Fangjun Kuang
Committed by GitHub

Avoid adding tail padding for VAD in generate-subtitles.py (#1674)

@@ -516,28 +516,22 @@ def main(): @@ -516,28 +516,22 @@ def main():
516 516
517 is_eof = False 517 is_eof = False
518 # TODO(fangjun): Support multithreads 518 # TODO(fangjun): Support multithreads
519 - while True: 519 + while not is_eof:
520 # *2 because int16_t has two bytes 520 # *2 because int16_t has two bytes
521 data = process.stdout.read(frames_per_read * 2) 521 data = process.stdout.read(frames_per_read * 2)
522 if not data: 522 if not data:
523 - if is_eof:  
524 - break 523 + vad.flush()
525 is_eof = True 524 is_eof = True
526 - # pad 1 second at the end of the file for the VAD  
527 - data = np.zeros(1 * args.sample_rate, dtype=np.int16)  
528 -  
529 - samples = np.frombuffer(data, dtype=np.int16)  
530 - samples = samples.astype(np.float32) / 32768 525 + else:
  526 + samples = np.frombuffer(data, dtype=np.int16)
  527 + samples = samples.astype(np.float32) / 32768
531 528
532 - num_processed_samples += samples.shape[0] 529 + num_processed_samples += samples.shape[0]
533 530
534 - buffer = np.concatenate([buffer, samples])  
535 - while len(buffer) > window_size:  
536 - vad.accept_waveform(buffer[:window_size])  
537 - buffer = buffer[window_size:]  
538 -  
539 - if is_eof:  
540 - vad.flush() 531 + buffer = np.concatenate([buffer, samples])
  532 + while len(buffer) > window_size:
  533 + vad.accept_waveform(buffer[:window_size])
  534 + buffer = buffer[window_size:]
541 535
542 streams = [] 536 streams = []
543 segments = [] 537 segments = []