Committed by
GitHub
Add microphone example for .Net keyword spotting (#1120)
正在显示
4 个修改的文件
包含
163 行增加
和
0 行删除
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +// | ||
| 3 | +// This file shows how to do keyword spotting with sherpa-onnx. | ||
| 4 | +// | ||
| 5 | +// 1. Download a model from | ||
| 6 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models | ||
| 7 | +// | ||
| 8 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 9 | +// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 10 | +// | ||
| 11 | +// 2. Now run it | ||
| 12 | +// | ||
| 13 | +// dotnet run | ||
| 14 | + | ||
| 15 | +using SherpaOnnx; | ||
| 16 | +using System.Collections.Generic; | ||
| 17 | +using System.Runtime.InteropServices; | ||
| 18 | +using System; | ||
| 19 | + | ||
| 20 | +using PortAudioSharp; | ||
| 21 | + | ||
| 22 | +class KeywordSpotterDemo | ||
| 23 | +{ | ||
| 24 | + static void Main(string[] args) | ||
| 25 | + { | ||
| 26 | + var config = new KeywordSpotterConfig(); | ||
| 27 | + config.FeatConfig.SampleRate = 16000; | ||
| 28 | + config.FeatConfig.FeatureDim = 80; | ||
| 29 | + | ||
| 30 | + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"; | ||
| 31 | + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"; | ||
| 32 | + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"; | ||
| 33 | + | ||
| 34 | + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"; | ||
| 35 | + config.ModelConfig.Provider = "cpu"; | ||
| 36 | + config.ModelConfig.NumThreads = 1; | ||
| 37 | + config.ModelConfig.Debug = 1; | ||
| 38 | + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"; | ||
| 39 | + | ||
| 40 | + var kws = new KeywordSpotter(config); | ||
| 41 | + | ||
| 42 | + var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; | ||
| 43 | + | ||
| 44 | + WaveReader waveReader = new WaveReader(filename); | ||
| 45 | + | ||
| 46 | + Console.WriteLine("----------Use pre-defined keywords----------"); | ||
| 47 | + | ||
| 48 | + OnlineStream s = kws.CreateStream(); | ||
| 49 | + | ||
| 50 | + Console.WriteLine(PortAudio.VersionInfo.versionText); | ||
| 51 | + PortAudio.Initialize(); | ||
| 52 | + | ||
| 53 | + Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}"); | ||
| 54 | + for (int i = 0; i != PortAudio.DeviceCount; ++i) | ||
| 55 | + { | ||
| 56 | + Console.WriteLine($" Device {i}"); | ||
| 57 | + DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i); | ||
| 58 | + Console.WriteLine($" Name: {deviceInfo.name}"); | ||
| 59 | + Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}"); | ||
| 60 | + Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}"); | ||
| 61 | + } | ||
| 62 | + int deviceIndex = PortAudio.DefaultInputDevice; | ||
| 63 | + if (deviceIndex == PortAudio.NoDevice) | ||
| 64 | + { | ||
| 65 | + Console.WriteLine("No default input device found"); | ||
| 66 | + Environment.Exit(1); | ||
| 67 | + } | ||
| 68 | + | ||
| 69 | + DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); | ||
| 70 | + | ||
| 71 | + Console.WriteLine(); | ||
| 72 | + Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); | ||
| 73 | + | ||
| 74 | + StreamParameters param = new StreamParameters(); | ||
| 75 | + param.device = deviceIndex; | ||
| 76 | + param.channelCount = 1; | ||
| 77 | + param.sampleFormat = SampleFormat.Float32; | ||
| 78 | + param.suggestedLatency = info.defaultLowInputLatency; | ||
| 79 | + param.hostApiSpecificStreamInfo = IntPtr.Zero; | ||
| 80 | + | ||
| 81 | + PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, | ||
| 82 | + UInt32 frameCount, | ||
| 83 | + ref StreamCallbackTimeInfo timeInfo, | ||
| 84 | + StreamCallbackFlags statusFlags, | ||
| 85 | + IntPtr userData | ||
| 86 | + ) => | ||
| 87 | + { | ||
| 88 | + float[] samples = new float[frameCount]; | ||
| 89 | + Marshal.Copy(input, samples, 0, (Int32)frameCount); | ||
| 90 | + | ||
| 91 | + s.AcceptWaveform(config.FeatConfig.SampleRate, samples); | ||
| 92 | + | ||
| 93 | + return StreamCallbackResult.Continue; | ||
| 94 | + }; | ||
| 95 | + | ||
| 96 | + PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, | ||
| 97 | + framesPerBuffer: 0, | ||
| 98 | + streamFlags: StreamFlags.ClipOff, | ||
| 99 | + callback: callback, | ||
| 100 | + userData: IntPtr.Zero | ||
| 101 | + ); | ||
| 102 | + | ||
| 103 | + Console.WriteLine(param); | ||
| 104 | + Console.WriteLine("Started! Please speak"); | ||
| 105 | + | ||
| 106 | + stream.Start(); | ||
| 107 | + | ||
| 108 | + while (true) | ||
| 109 | + { | ||
| 110 | + while (kws.IsReady(s)) | ||
| 111 | + { | ||
| 112 | + kws.Decode(s); | ||
| 113 | + } | ||
| 114 | + | ||
| 115 | + var result = kws.GetResult(s); | ||
| 116 | + if (result.Keyword != "") | ||
| 117 | + { | ||
| 118 | + Console.WriteLine("Detected: {0}", result.Keyword); | ||
| 119 | + } | ||
| 120 | + | ||
| 121 | + Thread.Sleep(200); // ms | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + PortAudio.Terminate(); | ||
| 125 | + } | ||
| 126 | +} | ||
| 127 | + |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>keyword_spotting_from_microphone</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <PackageReference Include="PortAudioSharp2" Version="*" /> | ||
| 13 | + </ItemGroup> | ||
| 14 | + | ||
| 15 | + <ItemGroup> | ||
| 16 | + <ProjectReference Include="..\Common\Common.csproj" /> | ||
| 17 | + </ItemGroup> | ||
| 18 | + | ||
| 19 | +</Project> |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 8 | + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +dotnet run -c Release |
| @@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp | @@ -27,6 +27,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csp | ||
| 27 | EndProject | 27 | EndProject |
| 28 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}" | 28 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}" |
| 29 | EndProject | 29 | EndProject |
| 30 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}" | ||
| 31 | +EndProject | ||
| 30 | Global | 32 | Global |
| 31 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 33 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 32 | Debug|Any CPU = Debug|Any CPU | 34 | Debug|Any CPU = Debug|Any CPU |
| @@ -81,6 +83,10 @@ Global | @@ -81,6 +83,10 @@ Global | ||
| 81 | {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU | 83 | {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 82 | {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU | 84 | {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 83 | {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU | 85 | {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU |
| 86 | + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 87 | + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 88 | + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 89 | + {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 84 | EndGlobalSection | 90 | EndGlobalSection |
| 85 | GlobalSection(SolutionProperties) = preSolution | 91 | GlobalSection(SolutionProperties) = preSolution |
| 86 | HideSolutionNode = FALSE | 92 | HideSolutionNode = FALSE |
-
请 注册 或 登录 后发表评论