Add microphone example for .Net keyword spotting (#1120)

Fangjun Kuang · GitHub
Commit 54e6e962bf91f0a08c0f287366b7e492364c96e7 54e6e962 1 parent 4c626e89
dotnet-examples/keyword-spotting-from-microphone/Program.cs
dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
dotnet-examples/keyword-spotting-from-microphone/run.sh
dotnet-examples/sherpa-onnx.sln
--- a/dotnet-examples/keyword-spotting-from-microphone/Program.cs 0 → 100644
查看文件 @54e6e96
+++ b/dotnet-examples/keyword-spotting-from-microphone/Program.cs 0 → 100644
查看文件 @54e6e96
+ // Copyright (c)  2024  Xiaomi Corporation
+ //
+ // This file shows how to do keyword spotting with sherpa-onnx.
+ //
+ // 1. Download a model from
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
+ //
+ // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+ // tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+ //
+ // 2. Now run it
+ //
+ // dotnet run
+ 
+ using SherpaOnnx;
+ using System.Collections.Generic;
+ using System.Runtime.InteropServices;
+ using System;
+ 
+ using PortAudioSharp;
+ 
+ class KeywordSpotterDemo
+ {
+   static void Main(string[] args)
+   {
+     var config = new KeywordSpotterConfig();
+     config.FeatConfig.SampleRate = 16000;
+     config.FeatConfig.FeatureDim = 80;
+ 
+     config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
+     config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
+     config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
+ 
+     config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
+     config.ModelConfig.Provider = "cpu";
+     config.ModelConfig.NumThreads = 1;
+     config.ModelConfig.Debug = 1;
+     config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";
+ 
+     var kws = new KeywordSpotter(config);
+ 
+     var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
+ 
+     WaveReader waveReader = new WaveReader(filename);
+ 
+     Console.WriteLine("----------Use pre-defined keywords----------");
+ 
+     OnlineStream s = kws.CreateStream();
+ 
+     Console.WriteLine(PortAudio.VersionInfo.versionText);
+     PortAudio.Initialize();
+ 
+     Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
+     for (int i = 0; i != PortAudio.DeviceCount; ++i)
+     {
+       Console.WriteLine($" Device {i}");
+       DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
+       Console.WriteLine($"   Name: {deviceInfo.name}");
+       Console.WriteLine($"   Max input channels: {deviceInfo.maxInputChannels}");
+       Console.WriteLine($"   Default sample rate: {deviceInfo.defaultSampleRate}");
+     }
+     int deviceIndex = PortAudio.DefaultInputDevice;
+     if (deviceIndex == PortAudio.NoDevice)
+     {
+       Console.WriteLine("No default input device found");
+       Environment.Exit(1);
+     }
+ 
+     DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
+ 
+     Console.WriteLine();
+     Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
+ 
+     StreamParameters param = new StreamParameters();
+     param.device = deviceIndex;
+     param.channelCount = 1;
+     param.sampleFormat = SampleFormat.Float32;
+     param.suggestedLatency = info.defaultLowInputLatency;
+     param.hostApiSpecificStreamInfo = IntPtr.Zero;
+ 
+     PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
+         UInt32 frameCount,
+         ref StreamCallbackTimeInfo timeInfo,
+         StreamCallbackFlags statusFlags,
+         IntPtr userData
+         ) =>
+     {
+       float[] samples = new float[frameCount];
+       Marshal.Copy(input, samples, 0, (Int32)frameCount);
+ 
+       s.AcceptWaveform(config.FeatConfig.SampleRate, samples);
+ 
+       return StreamCallbackResult.Continue;
+     };
+ 
+     PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate,
+         framesPerBuffer: 0,
+         streamFlags: StreamFlags.ClipOff,
+         callback: callback,
+         userData: IntPtr.Zero
+         );
+ 
+     Console.WriteLine(param);
+     Console.WriteLine("Started! Please speak");
+ 
+     stream.Start();
+ 
+     while (true)
+     {
+       while (kws.IsReady(s))
+       {
+         kws.Decode(s);
+       }
+ 
+       var result = kws.GetResult(s);
+       if (result.Keyword != "")
+       {
+         Console.WriteLine("Detected: {0}", result.Keyword);
+       }
+ 
+       Thread.Sleep(200); // ms
+     }
+ 
+     PortAudio.Terminate();
+   }
+ }
+ 
--- a/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj 0 → 100644
查看文件 @54e6e96
+++ b/dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj 0 → 100644
查看文件 @54e6e96
+ <Project Sdk="Microsoft.NET.Sdk">
+ 
+   <PropertyGroup>
+     <OutputType>Exe</OutputType>
+     <TargetFramework>net6.0</TargetFramework>
+     <RootNamespace>keyword_spotting_from_microphone</RootNamespace>
+     <ImplicitUsings>enable</ImplicitUsings>
+     <Nullable>enable</Nullable>
+   </PropertyGroup>
+ 
+   <ItemGroup>
+     <PackageReference Include="PortAudioSharp2" Version="*" />
+   </ItemGroup>
+ 
+   <ItemGroup>
+     <ProjectReference Include="..\Common\Common.csproj" />
+   </ItemGroup>
+ 
+ </Project>
--- a/dotnet-examples/keyword-spotting-from-microphone/run.sh 0 → 100755
查看文件 @54e6e96
+++ b/dotnet-examples/keyword-spotting-from-microphone/run.sh 0 → 100755
查看文件 @54e6e96
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+   tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+   rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
+ fi
+ 
+ dotnet run -c Release
--- a/dotnet-examples/sherpa-onnx.sln
查看文件 @54e6e96
+++ b/dotnet-examples/sherpa-onnx.sln
查看文件 @54e6e96
@@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
 EndProject
+ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
+ EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -81,6 +83,10 @@ Global
 		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
+ 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ 		{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE