Committed by
GitHub
Add python-api-examples: speech-recognition-from-microphone.py (#46)
正在显示
1 个修改的文件
包含
69 行增加
和
0 行删除
| 1 | +#!/usr/bin/env python3 | ||
| 2 | + | ||
| 3 | +# Real-time speech recognition from a microphone with sherpa-onnx Python API | ||
| 4 | +# | ||
| 5 | +# Please refer to | ||
| 6 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 7 | +# to download pre-trained models | ||
| 8 | + | ||
| 9 | +import sys | ||
| 10 | + | ||
| 11 | +try: | ||
| 12 | + import sounddevice as sd | ||
| 13 | +except ImportError as e: | ||
| 14 | + print("Please install sounddevice first. You can use") | ||
| 15 | + print() | ||
| 16 | + print(" pip install sounddevice") | ||
| 17 | + print() | ||
| 18 | + print("to install it") | ||
| 19 | + sys.exit(-1) | ||
| 20 | + | ||
| 21 | +import sherpa_onnx | ||
| 22 | + | ||
| 23 | + | ||
| 24 | +def create_recognizer(): | ||
| 25 | + # Please replace the model files if needed. | ||
| 26 | + # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 27 | + # for download links. | ||
| 28 | + recognizer = sherpa_onnx.OnlineRecognizer( | ||
| 29 | + tokens="./sherpa-onnx-lstm-en-2023-02-17/tokens.txt", | ||
| 30 | + encoder="./sherpa-onnx-lstm-en-2023-02-17/encoder-epoch-99-avg-1.onnx", | ||
| 31 | + decoder="./sherpa-onnx-lstm-en-2023-02-17/decoder-epoch-99-avg-1.onnx", | ||
| 32 | + joiner="./sherpa-onnx-lstm-en-2023-02-17/joiner-epoch-99-avg-1.onnx", | ||
| 33 | + num_threads=4, | ||
| 34 | + sample_rate=16000, | ||
| 35 | + feature_dim=80, | ||
| 36 | + ) | ||
| 37 | + return recognizer | ||
| 38 | + | ||
| 39 | + | ||
| 40 | +def main(): | ||
| 41 | + print("Started! Please speak") | ||
| 42 | + recognizer = create_recognizer() | ||
| 43 | + sample_rate = 16000 | ||
| 44 | + samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms | ||
| 45 | + last_result = "" | ||
| 46 | + stream = recognizer.create_stream() | ||
| 47 | + with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s: | ||
| 48 | + while True: | ||
| 49 | + samples, _ = s.read(samples_per_read) # a blocking read | ||
| 50 | + samples = samples.reshape(-1) | ||
| 51 | + stream.accept_waveform(sample_rate, samples) | ||
| 52 | + while recognizer.is_ready(stream): | ||
| 53 | + recognizer.decode_stream(stream) | ||
| 54 | + result = recognizer.get_result(stream) | ||
| 55 | + if last_result != result: | ||
| 56 | + last_result = result | ||
| 57 | + print(result) | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +if __name__ == "__main__": | ||
| 61 | + devices = sd.query_devices() | ||
| 62 | + print(devices) | ||
| 63 | + default_input_device_idx = sd.default.device[0] | ||
| 64 | + print(f'Use default device: {devices[default_input_device_idx]["name"]}') | ||
| 65 | + | ||
| 66 | + try: | ||
| 67 | + main() | ||
| 68 | + except KeyboardInterrupt: | ||
| 69 | + print("\nCaught Ctrl + C. Exiting") |
-
请 注册 或 登录 后发表评论