Program.cs 4.2 KB
// Copyright (c)  2024  Xiaomi Corporation
//
// This file shows how to do keyword spotting with sherpa-onnx.
//
// 1. Download a model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
//
// 2. Now run it
//
// dotnet run

using SherpaOnnx;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System;

using PortAudioSharp;

class KeywordSpotterDemo
{
  static void Main(string[] args)
  {
    var config = new KeywordSpotterConfig();
    config.FeatConfig.SampleRate = 16000;
    config.FeatConfig.FeatureDim = 80;

    config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
    config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";

    config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
    config.ModelConfig.Provider = "cpu";
    config.ModelConfig.NumThreads = 1;
    config.ModelConfig.Debug = 1;
    config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";

    var kws = new KeywordSpotter(config);

    var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";

    WaveReader waveReader = new WaveReader(filename);

    Console.WriteLine("----------Use pre-defined keywords----------");

    OnlineStream s = kws.CreateStream();

    Console.WriteLine(PortAudio.VersionInfo.versionText);
    PortAudio.Initialize();

    Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
    for (int i = 0; i != PortAudio.DeviceCount; ++i)
    {
      Console.WriteLine($" Device {i}");
      DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
      Console.WriteLine($"   Name: {deviceInfo.name}");
      Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
      Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
    }
    int deviceIndex = PortAudio.DefaultInputDevice;
    if (deviceIndex == PortAudio.NoDevice)
    {
      Console.WriteLine("No default input device found");
      Environment.Exit(1);
    }

    DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);

    Console.WriteLine();
    Console.WriteLine($"Use default device {deviceIndex} ({info.name})");

    StreamParameters param = new StreamParameters();
    param.device = deviceIndex;
    param.channelCount = 1;
    param.sampleFormat = SampleFormat.Float32;
    param.suggestedLatency = info.defaultLowInputLatency;
    param.hostApiSpecificStreamInfo = IntPtr.Zero;

    PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
        UInt32 frameCount,
        ref StreamCallbackTimeInfo timeInfo,
        StreamCallbackFlags statusFlags,
        IntPtr userData
        ) =>
    {
      float[] samples = new float[frameCount];
      Marshal.Copy(input, samples, 0, (Int32)frameCount);

      s.AcceptWaveform(config.FeatConfig.SampleRate, samples);

      return StreamCallbackResult.Continue;
    };

    PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
        framesPerBuffer: 0,
        streamFlags: StreamFlags.ClipOff,
        callback: callback,
        userData: IntPtr.Zero
        );

    Console.WriteLine(param);
    Console.WriteLine("Started! Please speak");

    stream.Start();

    while (true)
    {
      while (kws.IsReady(s))
      {
        kws.Decode(s);
      }

      var result = kws.GetResult(s);
      if (result.Keyword != "")
      {
        Console.WriteLine("Detected: {0}", result.Keyword);
      }

      Thread.Sleep(200); // ms
    }

    PortAudio.Terminate();
  }
}