正在显示
2 个修改的文件
包含
11 行增加
和
6 行删除
| @@ -335,11 +335,10 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | @@ -335,11 +335,10 @@ def create_second_pass_recognizer(args) -> sherpa_onnx.OfflineRecognizer: | ||
| 335 | 335 | ||
| 336 | def run_second_pass( | 336 | def run_second_pass( |
| 337 | recognizer: sherpa_onnx.OfflineRecognizer, | 337 | recognizer: sherpa_onnx.OfflineRecognizer, |
| 338 | - sample_buffers: List[np.ndarray], | 338 | + samples: np.ndarray, |
| 339 | sample_rate: int, | 339 | sample_rate: int, |
| 340 | ): | 340 | ): |
| 341 | stream = recognizer.create_stream() | 341 | stream = recognizer.create_stream() |
| 342 | - samples = np.concatenate(sample_buffers) | ||
| 343 | stream.accept_waveform(sample_rate, samples) | 342 | stream.accept_waveform(sample_rate, samples) |
| 344 | 343 | ||
| 345 | recognizer.decode_stream(stream) | 344 | recognizer.decode_stream(stream) |
| @@ -407,14 +406,20 @@ def main(): | @@ -407,14 +406,20 @@ def main(): | ||
| 407 | 406 | ||
| 408 | if is_endpoint: | 407 | if is_endpoint: |
| 409 | if result: | 408 | if result: |
| 409 | + samples = np.concatenate(sample_buffers) | ||
| 410 | + # There are internal sample buffers inside the streaming | ||
| 411 | + # feature extractor, so we cannot send all samples to | ||
| 412 | + # the 2nd pass. Here 8000 is just an empirical value | ||
| 413 | + # that should work for most streaming models in sherpa-onnx | ||
| 414 | + sample_buffers = [samples[-8000:]] | ||
| 415 | + samples = samples[:-8000] | ||
| 410 | result = run_second_pass( | 416 | result = run_second_pass( |
| 411 | recognizer=second_recognizer, | 417 | recognizer=second_recognizer, |
| 412 | - sample_buffers=sample_buffers, | 418 | + samples=samples, |
| 413 | sample_rate=sample_rate, | 419 | sample_rate=sample_rate, |
| 414 | ) | 420 | ) |
| 415 | result = result.lower().strip() | 421 | result = result.lower().strip() |
| 416 | 422 | ||
| 417 | - sample_buffers = [] | ||
| 418 | print( | 423 | print( |
| 419 | "\r{}:{}".format(segment_id, " " * len(last_result)), | 424 | "\r{}:{}".format(segment_id, " " * len(last_result)), |
| 420 | end="", | 425 | end="", |
| @@ -18,8 +18,8 @@ The input text can contain English words. | @@ -18,8 +18,8 @@ The input text can contain English words. | ||
| 18 | 18 | ||
| 19 | Usage: | 19 | Usage: |
| 20 | 20 | ||
| 21 | -Please download the model from: | ||
| 22 | -https://huggingface.co/frankyoujian/Edge-Punct-Casing/resolve/main/sherpa-onnx-cnn-bilstm-unigram-bpe-en.7z | 21 | +Please download the model from: |
| 22 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 | ||
| 23 | 23 | ||
| 24 | ./bin/Release/sherpa-onnx-online-punctuation \ | 24 | ./bin/Release/sherpa-onnx-online-punctuation \ |
| 25 | --cnn-bilstm=/path/to/model.onnx \ | 25 | --cnn-bilstm=/path/to/model.onnx \ |
-
请 注册 或 登录 后发表评论