Program.cs 4.2 KB
// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do keyword spotting with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
//
// 2. Now run it
//
// dotnet run

using PortAudioSharp;
using SherpaOnnx;
using System.Runtime.InteropServices;

class KeywordSpotterDemo
{
  static void Main(string[] args)
  {
    var config = new KeywordSpotterConfig();
    config.FeatConfig.SampleRate = 16000;
    config.FeatConfig.FeatureDim = 80;

    config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";

    config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
    config.ModelConfig.Provider = "cpu";
    config.ModelConfig.NumThreads = 1;
    config.ModelConfig.Debug = 1;
    config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";

    var kws = new KeywordSpotter(config);

    var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";

    var waveReader = new WaveReader(filename);

    Console.WriteLine("----------Use pre-defined keywords----------");

    var s = kws.CreateStream();

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();

    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      var deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultInputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default input device found");
      Environment.Exit(1);
    }

    var info = PortAudio.GetDeviceInfo(deviceIndex);

    Console.WriteLine();
    Console.WriteLine($"Use default device {deviceIndex} ({info.name})");

    var param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowInputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
        uint frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      var samples = new float[frameCount];
      Marshal.Copy(input, samples, 0, (int)frameCount);

      s.AcceptWaveform(config.FeatConfig.SampleRate, samples);

      return StreamCallbackResult.Continue;
    };

    var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: callback,
        userData: IntPtr.Zero
        );

    Console.WriteLine(param);
    Console.WriteLine("Started! Please speak");

    stream.Start();

    while (true)
    {
      while (kws.IsReady(s))
      {
        kws.Decode(s);

        var result = kws.GetResult(s);
        if (result.Keyword != string.Empty)
        {
          // Remember to call Reset() right after detecting a keyword
          kws.Reset(s);

          Console.WriteLine("Detected: {0}", result.Keyword);
        }
      }

      Thread.Sleep(200); // ms
    }
  }
}