Add python-api-examples: speech-recognition-from-microphone.py (#46)

Yifan Yang · GitHub
Commit 7ece27cd3082f199cf14cc45740c37a038f6796b 7ece27cd 1 parent c5d314b4
python-api-examples/speech-recognition-from-microphone.py
--- a/python-api-examples/speech-recognition-from-microphone.py 0 → 100644
查看文件 @7ece27c
+++ b/python-api-examples/speech-recognition-from-microphone.py 0 → 100644
查看文件 @7ece27c
+ #!/usr/bin/env python3
+ 
+ # Real-time speech recognition from a microphone with sherpa-onnx Python API
+ #
+ # Please refer to
+ # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+ # to download pre-trained models
+ 
+ import sys
+ 
+ try:
+     import sounddevice as sd
+ except ImportError as e:
+     print("Please install sounddevice first. You can use")
+     print()
+     print("  pip install sounddevice")
+     print()
+     print("to install it")
+     sys.exit(-1)
+ 
+ import sherpa_onnx
+ 
+ 
+ def create_recognizer():
+     # Please replace the model files if needed.
+     # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+     # for download links.
+     recognizer = sherpa_onnx.OnlineRecognizer(
+         tokens="./sherpa-onnx-lstm-en-2023-02-17/tokens.txt",
+         encoder="./sherpa-onnx-lstm-en-2023-02-17/encoder-epoch-99-avg-1.onnx",
+         decoder="./sherpa-onnx-lstm-en-2023-02-17/decoder-epoch-99-avg-1.onnx",
+         joiner="./sherpa-onnx-lstm-en-2023-02-17/joiner-epoch-99-avg-1.onnx",
+         num_threads=4,
+         sample_rate=16000,
+         feature_dim=80,
+     )
+     return recognizer
+ 
+ 
+ def main():
+     print("Started! Please speak")
+     recognizer = create_recognizer()
+     sample_rate = 16000
+     samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
+     last_result = ""
+     stream = recognizer.create_stream()
+     with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
+         while True:
+             samples, _ = s.read(samples_per_read)  # a blocking read
+             samples = samples.reshape(-1)
+             stream.accept_waveform(sample_rate, samples)
+             while recognizer.is_ready(stream):
+                 recognizer.decode_stream(stream)
+             result = recognizer.get_result(stream)
+             if last_result != result:
+                 last_result = result
+                 print(result)
+ 
+ 
+ if __name__ == "__main__":
+     devices = sd.query_devices()
+     print(devices)
+     default_input_device_idx = sd.default.device[0]
+     print(f'Use default device: {devices[default_input_device_idx]["name"]}')
+ 
+     try:
+         main()
+     except KeyboardInterrupt:
+         print("\nCaught Ctrl + C. Exiting")