Fangjun Kuang
Committed by GitHub

Add microphone example for .Net keyword spotting (#1120)

  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +//
  3 +// This file shows how to do keyword spotting with sherpa-onnx.
  4 +//
  5 +// 1. Download a model from
  6 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
  7 +//
  8 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  9 +// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  10 +//
  11 +// 2. Now run it
  12 +//
  13 +// dotnet run
  14 +
  15 +using SherpaOnnx;
  16 +using System.Collections.Generic;
  17 +using System.Runtime.InteropServices;
  18 +using System;
  19 +
  20 +using PortAudioSharp;
  21 +
  22 +class KeywordSpotterDemo
  23 +{
  24 + static void Main(string[] args)
  25 + {
  26 + var config = new KeywordSpotterConfig();
  27 + config.FeatConfig.SampleRate = 16000;
  28 + config.FeatConfig.FeatureDim = 80;
  29 +
  30 + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
  31 + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
  32 + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
  33 +
  34 + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
  35 + config.ModelConfig.Provider = "cpu";
  36 + config.ModelConfig.NumThreads = 1;
  37 + config.ModelConfig.Debug = 1;
  38 + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";
  39 +
  40 + var kws = new KeywordSpotter(config);
  41 +
  42 + var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
  43 +
  44 + WaveReader waveReader = new WaveReader(filename);
  45 +
  46 + Console.WriteLine("----------Use pre-defined keywords----------");
  47 +
  48 + OnlineStream s = kws.CreateStream();
  49 +
  50 + Console.WriteLine(PortAudio.VersionInfo.versionText);
  51 + PortAudio.Initialize();
  52 +
  53 + Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
  54 + for (int i = 0; i != PortAudio.DeviceCount; ++i)
  55 + {
  56 + Console.WriteLine($" Device {i}");
  57 + DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
  58 + Console.WriteLine($" Name: {deviceInfo.name}");
  59 + Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
  60 + Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
  61 + }
  62 + int deviceIndex = PortAudio.DefaultInputDevice;
  63 + if (deviceIndex == PortAudio.NoDevice)
  64 + {
  65 + Console.WriteLine("No default input device found");
  66 + Environment.Exit(1);
  67 + }
  68 +
  69 + DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
  70 +
  71 + Console.WriteLine();
  72 + Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
  73 +
  74 + StreamParameters param = new StreamParameters();
  75 + param.device = deviceIndex;
  76 + param.channelCount = 1;
  77 + param.sampleFormat = SampleFormat.Float32;
  78 + param.suggestedLatency = info.defaultLowInputLatency;
  79 + param.hostApiSpecificStreamInfo = IntPtr.Zero;
  80 +
  81 + PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
  82 + UInt32 frameCount,
  83 + ref StreamCallbackTimeInfo timeInfo,
  84 + StreamCallbackFlags statusFlags,
  85 + IntPtr userData
  86 + ) =>
  87 + {
  88 + float[] samples = new float[frameCount];
  89 + Marshal.Copy(input, samples, 0, (Int32)frameCount);
  90 +
  91 + s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
  92 +
  93 + return StreamCallbackResult.Continue;
  94 + };
  95 +
  96 + PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
  97 + framesPerBuffer: 0,
  98 + streamFlags: StreamFlags.ClipOff,
  99 + callback: callback,
  100 + userData: IntPtr.Zero
  101 + );
  102 +
  103 + Console.WriteLine(param);
  104 + Console.WriteLine("Started! Please speak");
  105 +
  106 + stream.Start();
  107 +
  108 + while (true)
  109 + {
  110 + while (kws.IsReady(s))
  111 + {
  112 + kws.Decode(s);
  113 + }
  114 +
  115 + var result = kws.GetResult(s);
  116 + if (result.Keyword != "")
  117 + {
  118 + Console.WriteLine("Detected: {0}", result.Keyword);
  119 + }
  120 +
  121 + Thread.Sleep(200); // ms
  122 + }
  123 +
  124 + PortAudio.Terminate();
  125 + }
  126 +}
  127 +
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
  5 + <TargetFramework>net6.0</TargetFramework>
  6 + <RootNamespace>keyword_spotting_from_microphone</RootNamespace>
  7 + <ImplicitUsings>enable</ImplicitUsings>
  8 + <Nullable>enable</Nullable>
  9 + </PropertyGroup>
  10 +
  11 + <ItemGroup>
  12 + <PackageReference Include="PortAudioSharp2" Version="*" />
  13 + </ItemGroup>
  14 +
  15 + <ItemGroup>
  16 + <ProjectReference Include="..\Common\Common.csproj" />
  17 + </ItemGroup>
  18 +
  19 +</Project>
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  7 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  8 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  9 +fi
  10 +
  11 +dotnet run -c Release
@@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp @@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp
27 EndProject 27 EndProject
28 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}" 28 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
29 EndProject 29 EndProject
  30 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
  31 +EndProject
30 Global 32 Global
31 GlobalSection(SolutionConfigurationPlatforms) = preSolution 33 GlobalSection(SolutionConfigurationPlatforms) = preSolution
32 Debug|Any CPU = Debug|Any CPU 34 Debug|Any CPU = Debug|Any CPU
@@ -81,6 +83,10 @@ Global @@ -81,6 +83,10 @@ Global
81 {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU 83 {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
82 {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU 84 {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
83 {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU 85 {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
  86 + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
  87 + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
  88 + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
  89 + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
84 EndGlobalSection 90 EndGlobalSection
85 GlobalSection(SolutionProperties) = preSolution 91 GlobalSection(SolutionProperties) = preSolution
86 HideSolutionNode = FALSE 92 HideSolutionNode = FALSE