Yifan Yang
Committed by GitHub

Add python-api-examples: speech-recognition-from-microphone.py (#46)

  1 +#!/usr/bin/env python3
  2 +
  3 +# Real-time speech recognition from a microphone with sherpa-onnx Python API
  4 +#
  5 +# Please refer to
  6 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  7 +# to download pre-trained models
  8 +
  9 +import sys
  10 +
  11 +try:
  12 + import sounddevice as sd
  13 +except ImportError as e:
  14 + print("Please install sounddevice first. You can use")
  15 + print()
  16 + print(" pip install sounddevice")
  17 + print()
  18 + print("to install it")
  19 + sys.exit(-1)
  20 +
  21 +import sherpa_onnx
  22 +
  23 +
  24 +def create_recognizer():
  25 + # Please replace the model files if needed.
  26 + # See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  27 + # for download links.
  28 + recognizer = sherpa_onnx.OnlineRecognizer(
  29 + tokens="./sherpa-onnx-lstm-en-2023-02-17/tokens.txt",
  30 + encoder="./sherpa-onnx-lstm-en-2023-02-17/encoder-epoch-99-avg-1.onnx",
  31 + decoder="./sherpa-onnx-lstm-en-2023-02-17/decoder-epoch-99-avg-1.onnx",
  32 + joiner="./sherpa-onnx-lstm-en-2023-02-17/joiner-epoch-99-avg-1.onnx",
  33 + num_threads=4,
  34 + sample_rate=16000,
  35 + feature_dim=80,
  36 + )
  37 + return recognizer
  38 +
  39 +
  40 +def main():
  41 + print("Started! Please speak")
  42 + recognizer = create_recognizer()
  43 + sample_rate = 16000
  44 + samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms
  45 + last_result = ""
  46 + stream = recognizer.create_stream()
  47 + with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
  48 + while True:
  49 + samples, _ = s.read(samples_per_read) # a blocking read
  50 + samples = samples.reshape(-1)
  51 + stream.accept_waveform(sample_rate, samples)
  52 + while recognizer.is_ready(stream):
  53 + recognizer.decode_stream(stream)
  54 + result = recognizer.get_result(stream)
  55 + if last_result != result:
  56 + last_result = result
  57 + print(result)
  58 +
  59 +
  60 +if __name__ == "__main__":
  61 + devices = sd.query_devices()
  62 + print(devices)
  63 + default_input_device_idx = sd.default.device[0]
  64 + print(f'Use default device: {devices[default_input_device_idx]["name"]}')
  65 +
  66 + try:
  67 + main()
  68 + except KeyboardInterrupt:
  69 + print("\nCaught Ctrl + C. Exiting")