Committed by
GitHub
Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)
正在显示
29 个修改的文件
包含
230 行增加
和
280 行删除
| 1 | <Project Sdk="Microsoft.NET.Sdk"> | 1 | <Project Sdk="Microsoft.NET.Sdk"> |
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | - <TargetFramework>net6.0</TargetFramework> | 4 | + <TargetFramework>net8.0</TargetFramework> |
| 5 | <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | 5 | <AllowUnsafeBlocks>true</AllowUnsafeBlocks> |
| 6 | </PropertyGroup> | 6 | </PropertyGroup> |
| 7 | <ItemGroup> | 7 | <ItemGroup> |
| @@ -4,25 +4,24 @@ using System.IO; | @@ -4,25 +4,24 @@ using System.IO; | ||
| 4 | 4 | ||
| 5 | using System.Runtime.InteropServices; | 5 | using System.Runtime.InteropServices; |
| 6 | 6 | ||
| 7 | -namespace SherpaOnnx | ||
| 8 | -{ | 7 | +namespace SherpaOnnx; |
| 9 | 8 | ||
| 10 | - [StructLayout(LayoutKind.Sequential)] | ||
| 11 | - public struct WaveHeader | ||
| 12 | - { | ||
| 13 | - public Int32 ChunkID; | ||
| 14 | - public Int32 ChunkSize; | ||
| 15 | - public Int32 Format; | ||
| 16 | - public Int32 SubChunk1ID; | ||
| 17 | - public Int32 SubChunk1Size; | ||
| 18 | - public Int16 AudioFormat; | ||
| 19 | - public Int16 NumChannels; | ||
| 20 | - public Int32 SampleRate; | ||
| 21 | - public Int32 ByteRate; | ||
| 22 | - public Int16 BlockAlign; | ||
| 23 | - public Int16 BitsPerSample; | ||
| 24 | - public Int32 SubChunk2ID; | ||
| 25 | - public Int32 SubChunk2Size; | 9 | +[StructLayout(LayoutKind.Sequential)] |
| 10 | +public struct WaveHeader | ||
| 11 | +{ | ||
| 12 | + public int ChunkID; | ||
| 13 | + public int ChunkSize; | ||
| 14 | + public int Format; | ||
| 15 | + public int SubChunk1ID; | ||
| 16 | + public int SubChunk1Size; | ||
| 17 | + public short AudioFormat; | ||
| 18 | + public short NumChannels; | ||
| 19 | + public int SampleRate; | ||
| 20 | + public int ByteRate; | ||
| 21 | + public short BlockAlign; | ||
| 22 | + public short BitsPerSample; | ||
| 23 | + public int SubChunk2ID; | ||
| 24 | + public int SubChunk2Size; | ||
| 26 | 25 | ||
| 27 | public bool Validate() | 26 | public bool Validate() |
| 28 | { | 27 | { |
| @@ -84,23 +83,22 @@ namespace SherpaOnnx | @@ -84,23 +83,22 @@ namespace SherpaOnnx | ||
| 84 | 83 | ||
| 85 | return true; | 84 | return true; |
| 86 | } | 85 | } |
| 87 | - } | 86 | +} |
| 88 | 87 | ||
| 89 | - // It supports only 16-bit, single channel WAVE format. | ||
| 90 | - // The sample rate can be any value. | ||
| 91 | - public class WaveReader | ||
| 92 | - { | ||
| 93 | - public WaveReader(String fileName) | 88 | +// It supports only 16-bit, single channel WAVE format. |
| 89 | +// The sample rate can be any value. | ||
| 90 | +public class WaveReader | ||
| 91 | +{ | ||
| 92 | + public WaveReader(string fileName) | ||
| 94 | { | 93 | { |
| 95 | if (!File.Exists(fileName)) | 94 | if (!File.Exists(fileName)) |
| 96 | { | 95 | { |
| 97 | throw new ApplicationException($"{fileName} does not exist!"); | 96 | throw new ApplicationException($"{fileName} does not exist!"); |
| 98 | } | 97 | } |
| 99 | 98 | ||
| 100 | - using (var stream = File.Open(fileName, FileMode.Open)) | ||
| 101 | - { | ||
| 102 | - using (var reader = new BinaryReader(stream)) | ||
| 103 | - { | 99 | + using var stream = File.Open(fileName, FileMode.Open); |
| 100 | + using var reader = new BinaryReader(stream); | ||
| 101 | + | ||
| 104 | _header = ReadHeader(reader); | 102 | _header = ReadHeader(reader); |
| 105 | 103 | ||
| 106 | if (!_header.Validate()) | 104 | if (!_header.Validate()) |
| @@ -113,8 +111,8 @@ namespace SherpaOnnx | @@ -113,8 +111,8 @@ namespace SherpaOnnx | ||
| 113 | // now read samples | 111 | // now read samples |
| 114 | // _header.SubChunk2Size contains number of bytes in total. | 112 | // _header.SubChunk2Size contains number of bytes in total. |
| 115 | // we assume each sample is of type int16 | 113 | // we assume each sample is of type int16 |
| 116 | - byte[] buffer = reader.ReadBytes(_header.SubChunk2Size); | ||
| 117 | - short[] samples_int16 = new short[_header.SubChunk2Size / 2]; | 114 | + var buffer = reader.ReadBytes(_header.SubChunk2Size); |
| 115 | + var samples_int16 = new short[_header.SubChunk2Size / 2]; | ||
| 118 | Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); | 116 | Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); |
| 119 | 117 | ||
| 120 | _samples = new float[samples_int16.Length]; | 118 | _samples = new float[samples_int16.Length]; |
| @@ -124,12 +122,10 @@ namespace SherpaOnnx | @@ -124,12 +122,10 @@ namespace SherpaOnnx | ||
| 124 | _samples[i] = samples_int16[i] / 32768.0F; | 122 | _samples[i] = samples_int16[i] / 32768.0F; |
| 125 | } | 123 | } |
| 126 | } | 124 | } |
| 127 | - } | ||
| 128 | - } | ||
| 129 | 125 | ||
| 130 | private static WaveHeader ReadHeader(BinaryReader reader) | 126 | private static WaveHeader ReadHeader(BinaryReader reader) |
| 131 | { | 127 | { |
| 132 | - byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); | 128 | + var bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); |
| 133 | 129 | ||
| 134 | GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); | 130 | GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); |
| 135 | WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; | 131 | WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; |
| @@ -142,8 +138,8 @@ namespace SherpaOnnx | @@ -142,8 +138,8 @@ namespace SherpaOnnx | ||
| 142 | { | 138 | { |
| 143 | var bs = reader.BaseStream; | 139 | var bs = reader.BaseStream; |
| 144 | 140 | ||
| 145 | - Int32 subChunk2ID = _header.SubChunk2ID; | ||
| 146 | - Int32 subChunk2Size = _header.SubChunk2Size; | 141 | + var subChunk2ID = _header.SubChunk2ID; |
| 142 | + var subChunk2Size = _header.SubChunk2Size; | ||
| 147 | 143 | ||
| 148 | while (bs.Position != bs.Length && subChunk2ID != 0x61746164) | 144 | while (bs.Position != bs.Length && subChunk2ID != 0x61746164) |
| 149 | { | 145 | { |
| @@ -161,14 +157,13 @@ namespace SherpaOnnx | @@ -161,14 +157,13 @@ namespace SherpaOnnx | ||
| 161 | private float[] _samples; | 157 | private float[] _samples; |
| 162 | 158 | ||
| 163 | public int SampleRate => _header.SampleRate; | 159 | public int SampleRate => _header.SampleRate; |
| 160 | + | ||
| 164 | public float[] Samples => _samples; | 161 | public float[] Samples => _samples; |
| 165 | 162 | ||
| 166 | - public static void Test(String fileName) | 163 | + public static void Test(string fileName) |
| 167 | { | 164 | { |
| 168 | WaveReader reader = new WaveReader(fileName); | 165 | WaveReader reader = new WaveReader(fileName); |
| 169 | Console.WriteLine($"samples length: {reader.Samples.Length}"); | 166 | Console.WriteLine($"samples length: {reader.Samples.Length}"); |
| 170 | Console.WriteLine($"samples rate: {reader.SampleRate}"); | 167 | Console.WriteLine($"samples rate: {reader.SampleRate}"); |
| 171 | } | 168 | } |
| 172 | - } | ||
| 173 | - | ||
| 174 | } | 169 | } |
| @@ -13,8 +13,6 @@ | @@ -13,8 +13,6 @@ | ||
| 13 | // dotnet run | 13 | // dotnet run |
| 14 | 14 | ||
| 15 | using SherpaOnnx; | 15 | using SherpaOnnx; |
| 16 | -using System.Collections.Generic; | ||
| 17 | -using System; | ||
| 18 | 16 | ||
| 19 | class KeywordSpotterDemo | 17 | class KeywordSpotterDemo |
| 20 | { | 18 | { |
| @@ -38,11 +36,11 @@ class KeywordSpotterDemo | @@ -38,11 +36,11 @@ class KeywordSpotterDemo | ||
| 38 | 36 | ||
| 39 | var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; | 37 | var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; |
| 40 | 38 | ||
| 41 | - WaveReader waveReader = new WaveReader(filename); | 39 | + var waveReader = new WaveReader(filename); |
| 42 | 40 | ||
| 43 | Console.WriteLine("----------Use pre-defined keywords----------"); | 41 | Console.WriteLine("----------Use pre-defined keywords----------"); |
| 44 | 42 | ||
| 45 | - OnlineStream s = kws.CreateStream(); | 43 | + var s = kws.CreateStream(); |
| 46 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | 44 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); |
| 47 | 45 | ||
| 48 | float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | 46 | float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; |
| @@ -53,7 +51,7 @@ class KeywordSpotterDemo | @@ -53,7 +51,7 @@ class KeywordSpotterDemo | ||
| 53 | { | 51 | { |
| 54 | kws.Decode(s); | 52 | kws.Decode(s); |
| 55 | var result = kws.GetResult(s); | 53 | var result = kws.GetResult(s); |
| 56 | - if (result.Keyword != "") | 54 | + if (result.Keyword != string.Empty) |
| 57 | { | 55 | { |
| 58 | Console.WriteLine("Detected: {0}", result.Keyword); | 56 | Console.WriteLine("Detected: {0}", result.Keyword); |
| 59 | } | 57 | } |
| @@ -70,7 +68,7 @@ class KeywordSpotterDemo | @@ -70,7 +68,7 @@ class KeywordSpotterDemo | ||
| 70 | { | 68 | { |
| 71 | kws.Decode(s); | 69 | kws.Decode(s); |
| 72 | var result = kws.GetResult(s); | 70 | var result = kws.GetResult(s); |
| 73 | - if (result.Keyword != "") | 71 | + if (result.Keyword != string.Empty) |
| 74 | { | 72 | { |
| 75 | Console.WriteLine("Detected: {0}", result.Keyword); | 73 | Console.WriteLine("Detected: {0}", result.Keyword); |
| 76 | } | 74 | } |
| @@ -89,7 +87,7 @@ class KeywordSpotterDemo | @@ -89,7 +87,7 @@ class KeywordSpotterDemo | ||
| 89 | { | 87 | { |
| 90 | kws.Decode(s); | 88 | kws.Decode(s); |
| 91 | var result = kws.GetResult(s); | 89 | var result = kws.GetResult(s); |
| 92 | - if (result.Keyword != "") | 90 | + if (result.Keyword != string.Empty) |
| 93 | { | 91 | { |
| 94 | Console.WriteLine("Detected: {0}", result.Keyword); | 92 | Console.WriteLine("Detected: {0}", result.Keyword); |
| 95 | } | 93 | } |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>keyword_spotting_from_files</RootNamespace> | 6 | <RootNamespace>keyword_spotting_from_files</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -12,12 +12,9 @@ | @@ -12,12 +12,9 @@ | ||
| 12 | // | 12 | // |
| 13 | // dotnet run | 13 | // dotnet run |
| 14 | 14 | ||
| 15 | +using PortAudioSharp; | ||
| 15 | using SherpaOnnx; | 16 | using SherpaOnnx; |
| 16 | -using System.Collections.Generic; | ||
| 17 | using System.Runtime.InteropServices; | 17 | using System.Runtime.InteropServices; |
| 18 | -using System; | ||
| 19 | - | ||
| 20 | -using PortAudioSharp; | ||
| 21 | 18 | ||
| 22 | class KeywordSpotterDemo | 19 | class KeywordSpotterDemo |
| 23 | { | 20 | { |
| @@ -41,11 +38,11 @@ class KeywordSpotterDemo | @@ -41,11 +38,11 @@ class KeywordSpotterDemo | ||
| 41 | 38 | ||
| 42 | var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; | 39 | var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"; |
| 43 | 40 | ||
| 44 | - WaveReader waveReader = new WaveReader(filename); | 41 | + var waveReader = new WaveReader(filename); |
| 45 | 42 | ||
| 46 | Console.WriteLine("----------Use pre-defined keywords----------"); | 43 | Console.WriteLine("----------Use pre-defined keywords----------"); |
| 47 | 44 | ||
| 48 | - OnlineStream s = kws.CreateStream(); | 45 | + var s = kws.CreateStream(); |
| 49 | 46 | ||
| 50 | Console.WriteLine(PortAudio.VersionInfo.versionText); | 47 | Console.WriteLine(PortAudio.VersionInfo.versionText); |
| 51 | PortAudio.Initialize(); | 48 | PortAudio.Initialize(); |
| @@ -54,7 +51,7 @@ class KeywordSpotterDemo | @@ -54,7 +51,7 @@ class KeywordSpotterDemo | ||
| 54 | for (int i = 0; i != PortAudio.DeviceCount; ++i) | 51 | for (int i = 0; i != PortAudio.DeviceCount; ++i) |
| 55 | { | 52 | { |
| 56 | Console.WriteLine($" Device {i}"); | 53 | Console.WriteLine($" Device {i}"); |
| 57 | - DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i); | 54 | + var deviceInfo = PortAudio.GetDeviceInfo(i); |
| 58 | Console.WriteLine($" Name: {deviceInfo.name}"); | 55 | Console.WriteLine($" Name: {deviceInfo.name}"); |
| 59 | Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}"); | 56 | Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}"); |
| 60 | Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}"); | 57 | Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}"); |
| @@ -66,12 +63,12 @@ class KeywordSpotterDemo | @@ -66,12 +63,12 @@ class KeywordSpotterDemo | ||
| 66 | Environment.Exit(1); | 63 | Environment.Exit(1); |
| 67 | } | 64 | } |
| 68 | 65 | ||
| 69 | - DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); | 66 | + var info = PortAudio.GetDeviceInfo(deviceIndex); |
| 70 | 67 | ||
| 71 | Console.WriteLine(); | 68 | Console.WriteLine(); |
| 72 | Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); | 69 | Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); |
| 73 | 70 | ||
| 74 | - StreamParameters param = new StreamParameters(); | 71 | + var param = new StreamParameters(); |
| 75 | param.device = deviceIndex; | 72 | param.device = deviceIndex; |
| 76 | param.channelCount = 1; | 73 | param.channelCount = 1; |
| 77 | param.sampleFormat = SampleFormat.Float32; | 74 | param.sampleFormat = SampleFormat.Float32; |
| @@ -79,21 +76,21 @@ class KeywordSpotterDemo | @@ -79,21 +76,21 @@ class KeywordSpotterDemo | ||
| 79 | param.hostApiSpecificStreamInfo = IntPtr.Zero; | 76 | param.hostApiSpecificStreamInfo = IntPtr.Zero; |
| 80 | 77 | ||
| 81 | PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, | 78 | PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, |
| 82 | - UInt32 frameCount, | 79 | + uint frameCount, |
| 83 | ref StreamCallbackTimeInfo timeInfo, | 80 | ref StreamCallbackTimeInfo timeInfo, |
| 84 | StreamCallbackFlags statusFlags, | 81 | StreamCallbackFlags statusFlags, |
| 85 | IntPtr userData | 82 | IntPtr userData |
| 86 | ) => | 83 | ) => |
| 87 | { | 84 | { |
| 88 | - float[] samples = new float[frameCount]; | ||
| 89 | - Marshal.Copy(input, samples, 0, (Int32)frameCount); | 85 | + var samples = new float[frameCount]; |
| 86 | + Marshal.Copy(input, samples, 0, (int)frameCount); | ||
| 90 | 87 | ||
| 91 | s.AcceptWaveform(config.FeatConfig.SampleRate, samples); | 88 | s.AcceptWaveform(config.FeatConfig.SampleRate, samples); |
| 92 | 89 | ||
| 93 | return StreamCallbackResult.Continue; | 90 | return StreamCallbackResult.Continue; |
| 94 | }; | 91 | }; |
| 95 | 92 | ||
| 96 | - PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, | 93 | + var stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: config.FeatConfig.SampleRate, |
| 97 | framesPerBuffer: 0, | 94 | framesPerBuffer: 0, |
| 98 | streamFlags: StreamFlags.ClipOff, | 95 | streamFlags: StreamFlags.ClipOff, |
| 99 | callback: callback, | 96 | callback: callback, |
| @@ -113,15 +110,13 @@ class KeywordSpotterDemo | @@ -113,15 +110,13 @@ class KeywordSpotterDemo | ||
| 113 | } | 110 | } |
| 114 | 111 | ||
| 115 | var result = kws.GetResult(s); | 112 | var result = kws.GetResult(s); |
| 116 | - if (result.Keyword != "") | 113 | + if (result.Keyword != string.Empty) |
| 117 | { | 114 | { |
| 118 | Console.WriteLine("Detected: {0}", result.Keyword); | 115 | Console.WriteLine("Detected: {0}", result.Keyword); |
| 119 | } | 116 | } |
| 120 | 117 | ||
| 121 | Thread.Sleep(200); // ms | 118 | Thread.Sleep(200); // ms |
| 122 | } | 119 | } |
| 123 | - | ||
| 124 | - PortAudio.Terminate(); | ||
| 125 | } | 120 | } |
| 126 | } | 121 | } |
| 127 | 122 |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>keyword_spotting_from_microphone</RootNamespace> | 6 | <RootNamespace>keyword_spotting_from_microphone</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -5,17 +5,14 @@ | @@ -5,17 +5,14 @@ | ||
| 5 | // Please refer to | 5 | // Please refer to |
| 6 | // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | 6 | // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html |
| 7 | // to download non-streaming models | 7 | // to download non-streaming models |
| 8 | -using CommandLine.Text; | ||
| 9 | using CommandLine; | 8 | using CommandLine; |
| 9 | +using CommandLine.Text; | ||
| 10 | using SherpaOnnx; | 10 | using SherpaOnnx; |
| 11 | -using System.Collections.Generic; | ||
| 12 | -using System; | ||
| 13 | 11 | ||
| 14 | class OfflineDecodeFiles | 12 | class OfflineDecodeFiles |
| 15 | { | 13 | { |
| 16 | class Options | 14 | class Options |
| 17 | { | 15 | { |
| 18 | - | ||
| 19 | [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] | 16 | [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] |
| 20 | public int SampleRate { get; set; } = 16000; | 17 | public int SampleRate { get; set; } = 16000; |
| 21 | 18 | ||
| @@ -23,58 +20,58 @@ class OfflineDecodeFiles | @@ -23,58 +20,58 @@ class OfflineDecodeFiles | ||
| 23 | public int FeatureDim { get; set; } = 80; | 20 | public int FeatureDim { get; set; } = 80; |
| 24 | 21 | ||
| 25 | [Option(Required = false, HelpText = "Path to tokens.txt")] | 22 | [Option(Required = false, HelpText = "Path to tokens.txt")] |
| 26 | - public string Tokens { get; set; } = ""; | 23 | + public string Tokens { get; set; } = string.Empty; |
| 27 | 24 | ||
| 28 | [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] | 25 | [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] |
| 29 | - public string Encoder { get; set; } = ""; | 26 | + public string Encoder { get; set; } = string.Empty; |
| 30 | 27 | ||
| 31 | [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] | 28 | [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] |
| 32 | - public string Decoder { get; set; } = ""; | 29 | + public string Decoder { get; set; } = string.Empty; |
| 33 | 30 | ||
| 34 | [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] | 31 | [Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] |
| 35 | - public string Joiner { get; set; } = ""; | 32 | + public string Joiner { get; set; } = string.Empty; |
| 36 | 33 | ||
| 37 | [Option("model-type", Required = false, Default = "", HelpText = "model type")] | 34 | [Option("model-type", Required = false, Default = "", HelpText = "model type")] |
| 38 | - public string ModelType { get; set; } = ""; | 35 | + public string ModelType { get; set; } = string.Empty; |
| 39 | 36 | ||
| 40 | [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] | 37 | [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] |
| 41 | - public string WhisperEncoder { get; set; } = ""; | 38 | + public string WhisperEncoder { get; set; } = string.Empty; |
| 42 | 39 | ||
| 43 | [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] | 40 | [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] |
| 44 | - public string WhisperDecoder { get; set; } = ""; | 41 | + public string WhisperDecoder { get; set; } = string.Empty; |
| 45 | 42 | ||
| 46 | [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")] | 43 | [Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")] |
| 47 | - public string WhisperLanguage { get; set; } = ""; | 44 | + public string WhisperLanguage { get; set; } = string.Empty; |
| 48 | 45 | ||
| 49 | [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")] | 46 | [Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")] |
| 50 | public string WhisperTask { get; set; } = "transcribe"; | 47 | public string WhisperTask { get; set; } = "transcribe"; |
| 51 | 48 | ||
| 52 | [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")] | 49 | [Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")] |
| 53 | - public string MoonshinePreprocessor { get; set; } = ""; | 50 | + public string MoonshinePreprocessor { get; set; } = string.Empty; |
| 54 | 51 | ||
| 55 | [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")] | 52 | [Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")] |
| 56 | - public string MoonshineEncoder { get; set; } = ""; | 53 | + public string MoonshineEncoder { get; set; } = string.Empty; |
| 57 | 54 | ||
| 58 | [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")] | 55 | [Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")] |
| 59 | - public string MoonshineUncachedDecoder { get; set; } = ""; | 56 | + public string MoonshineUncachedDecoder { get; set; } = string.Empty; |
| 60 | 57 | ||
| 61 | [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")] | 58 | [Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")] |
| 62 | - public string MoonshineCachedDecoder { get; set; } = ""; | 59 | + public string MoonshineCachedDecoder { get; set; } = string.Empty; |
| 63 | 60 | ||
| 64 | [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] | 61 | [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] |
| 65 | - public string TdnnModel { get; set; } = ""; | 62 | + public string TdnnModel { get; set; } = string.Empty; |
| 66 | 63 | ||
| 67 | [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] | 64 | [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] |
| 68 | - public string Paraformer { get; set; } = ""; | 65 | + public string Paraformer { get; set; } = string.Empty; |
| 69 | 66 | ||
| 70 | [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] | 67 | [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] |
| 71 | - public string NeMoCtc { get; set; } = ""; | 68 | + public string NeMoCtc { get; set; } = string.Empty; |
| 72 | 69 | ||
| 73 | [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] | 70 | [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] |
| 74 | - public string TeleSpeechCtc { get; set; } = ""; | 71 | + public string TeleSpeechCtc { get; set; } = string.Empty; |
| 75 | 72 | ||
| 76 | [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] | 73 | [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] |
| 77 | - public string SenseVoiceModel { get; set; } = ""; | 74 | + public string SenseVoiceModel { get; set; } = string.Empty; |
| 78 | 75 | ||
| 79 | [Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")] | 76 | [Option("sense-voice-use-itn", Required = false, HelpText = "1 to use inverse text normalization for sense voice.")] |
| 80 | public int SenseVoiceUseItn { get; set; } = 1; | 77 | public int SenseVoiceUseItn { get; set; } = 1; |
| @@ -88,7 +85,7 @@ class OfflineDecodeFiles | @@ -88,7 +85,7 @@ class OfflineDecodeFiles | ||
| 88 | 85 | ||
| 89 | [Option("rule-fsts", Required = false, Default = "", | 86 | [Option("rule-fsts", Required = false, Default = "", |
| 90 | HelpText = "If not empty, path to rule fst for inverse text normalization")] | 87 | HelpText = "If not empty, path to rule fst for inverse text normalization")] |
| 91 | - public string RuleFsts { get; set; } = ""; | 88 | + public string RuleFsts { get; set; } = string.Empty; |
| 92 | 89 | ||
| 93 | [Option("max-active-paths", Required = false, Default = 4, | 90 | [Option("max-active-paths", Required = false, Default = 4, |
| 94 | HelpText = @"Used only when --decoding--method is modified_beam_search. | 91 | HelpText = @"Used only when --decoding--method is modified_beam_search. |
| @@ -96,7 +93,7 @@ It specifies number of active paths to keep during the search")] | @@ -96,7 +93,7 @@ It specifies number of active paths to keep during the search")] | ||
| 96 | public int MaxActivePaths { get; set; } = 4; | 93 | public int MaxActivePaths { get; set; } = 4; |
| 97 | 94 | ||
| 98 | [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] | 95 | [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] |
| 99 | - public string HotwordsFile { get; set; } = ""; | 96 | + public string HotwordsFile { get; set; } = string.Empty; |
| 100 | 97 | ||
| 101 | [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] | 98 | [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] |
| 102 | public float HotwordsScore { get; set; } = 1.5F; | 99 | public float HotwordsScore { get; set; } = 1.5F; |
| @@ -117,7 +114,7 @@ It specifies number of active paths to keep during the search")] | @@ -117,7 +114,7 @@ It specifies number of active paths to keep during the search")] | ||
| 117 | 114 | ||
| 118 | private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) | 115 | private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) |
| 119 | { | 116 | { |
| 120 | - string usage = @" | 117 | + var usage = @" |
| 121 | # Zipformer | 118 | # Zipformer |
| 122 | 119 | ||
| 123 | dotnet run \ | 120 | dotnet run \ |
| @@ -213,42 +210,42 @@ to download pre-trained Tdnn models. | @@ -213,42 +210,42 @@ to download pre-trained Tdnn models. | ||
| 213 | 210 | ||
| 214 | config.ModelConfig.Tokens = options.Tokens; | 211 | config.ModelConfig.Tokens = options.Tokens; |
| 215 | 212 | ||
| 216 | - if (!String.IsNullOrEmpty(options.Encoder)) | 213 | + if (!string.IsNullOrEmpty(options.Encoder)) |
| 217 | { | 214 | { |
| 218 | // this is a transducer model | 215 | // this is a transducer model |
| 219 | config.ModelConfig.Transducer.Encoder = options.Encoder; | 216 | config.ModelConfig.Transducer.Encoder = options.Encoder; |
| 220 | config.ModelConfig.Transducer.Decoder = options.Decoder; | 217 | config.ModelConfig.Transducer.Decoder = options.Decoder; |
| 221 | config.ModelConfig.Transducer.Joiner = options.Joiner; | 218 | config.ModelConfig.Transducer.Joiner = options.Joiner; |
| 222 | } | 219 | } |
| 223 | - else if (!String.IsNullOrEmpty(options.Paraformer)) | 220 | + else if (!string.IsNullOrEmpty(options.Paraformer)) |
| 224 | { | 221 | { |
| 225 | config.ModelConfig.Paraformer.Model = options.Paraformer; | 222 | config.ModelConfig.Paraformer.Model = options.Paraformer; |
| 226 | } | 223 | } |
| 227 | - else if (!String.IsNullOrEmpty(options.NeMoCtc)) | 224 | + else if (!string.IsNullOrEmpty(options.NeMoCtc)) |
| 228 | { | 225 | { |
| 229 | config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; | 226 | config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; |
| 230 | } | 227 | } |
| 231 | - else if (!String.IsNullOrEmpty(options.TeleSpeechCtc)) | 228 | + else if (!string.IsNullOrEmpty(options.TeleSpeechCtc)) |
| 232 | { | 229 | { |
| 233 | config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; | 230 | config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; |
| 234 | } | 231 | } |
| 235 | - else if (!String.IsNullOrEmpty(options.WhisperEncoder)) | 232 | + else if (!string.IsNullOrEmpty(options.WhisperEncoder)) |
| 236 | { | 233 | { |
| 237 | config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; | 234 | config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; |
| 238 | config.ModelConfig.Whisper.Decoder = options.WhisperDecoder; | 235 | config.ModelConfig.Whisper.Decoder = options.WhisperDecoder; |
| 239 | config.ModelConfig.Whisper.Language = options.WhisperLanguage; | 236 | config.ModelConfig.Whisper.Language = options.WhisperLanguage; |
| 240 | config.ModelConfig.Whisper.Task = options.WhisperTask; | 237 | config.ModelConfig.Whisper.Task = options.WhisperTask; |
| 241 | } | 238 | } |
| 242 | - else if (!String.IsNullOrEmpty(options.TdnnModel)) | 239 | + else if (!string.IsNullOrEmpty(options.TdnnModel)) |
| 243 | { | 240 | { |
| 244 | config.ModelConfig.Tdnn.Model = options.TdnnModel; | 241 | config.ModelConfig.Tdnn.Model = options.TdnnModel; |
| 245 | } | 242 | } |
| 246 | - else if (!String.IsNullOrEmpty(options.SenseVoiceModel)) | 243 | + else if (!string.IsNullOrEmpty(options.SenseVoiceModel)) |
| 247 | { | 244 | { |
| 248 | config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel; | 245 | config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel; |
| 249 | config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn; | 246 | config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn; |
| 250 | } | 247 | } |
| 251 | - else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor)) | 248 | + else if (!string.IsNullOrEmpty(options.MoonshinePreprocessor)) |
| 252 | { | 249 | { |
| 253 | config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor; | 250 | config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor; |
| 254 | config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder; | 251 | config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder; |
| @@ -270,17 +267,17 @@ to download pre-trained Tdnn models. | @@ -270,17 +267,17 @@ to download pre-trained Tdnn models. | ||
| 270 | 267 | ||
| 271 | config.ModelConfig.Debug = 0; | 268 | config.ModelConfig.Debug = 0; |
| 272 | 269 | ||
| 273 | - OfflineRecognizer recognizer = new OfflineRecognizer(config); | 270 | + var recognizer = new OfflineRecognizer(config); |
| 274 | 271 | ||
| 275 | - string[] files = options.Files.ToArray(); | 272 | + var files = options.Files.ToArray(); |
| 276 | 273 | ||
| 277 | // We create a separate stream for each file | 274 | // We create a separate stream for each file |
| 278 | - List<OfflineStream> streams = new List<OfflineStream>(); | 275 | + var streams = new List<OfflineStream>(); |
| 279 | streams.EnsureCapacity(files.Length); | 276 | streams.EnsureCapacity(files.Length); |
| 280 | 277 | ||
| 281 | for (int i = 0; i != files.Length; ++i) | 278 | for (int i = 0; i != files.Length; ++i) |
| 282 | { | 279 | { |
| 283 | - OfflineStream s = recognizer.CreateStream(); | 280 | + var s = recognizer.CreateStream(); |
| 284 | 281 | ||
| 285 | WaveReader waveReader = new WaveReader(files[i]); | 282 | WaveReader waveReader = new WaveReader(files[i]); |
| 286 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | 283 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); |
| @@ -299,7 +296,7 @@ to download pre-trained Tdnn models. | @@ -299,7 +296,7 @@ to download pre-trained Tdnn models. | ||
| 299 | Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens)); | 296 | Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens)); |
| 300 | if (r.Timestamps != null && r.Timestamps.Length > 0) { | 297 | if (r.Timestamps != null && r.Timestamps.Length > 0) { |
| 301 | Console.Write("Timestamps: ["); | 298 | Console.Write("Timestamps: ["); |
| 302 | - var sep = ""; | 299 | + var sep = string.Empty; |
| 303 | for (int k = 0; k != r.Timestamps.Length; ++k) | 300 | for (int k = 0; k != r.Timestamps.Length; ++k) |
| 304 | { | 301 | { |
| 305 | Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00")); | 302 | Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00")); |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>offline_decode_files</RootNamespace> | 6 | <RootNamespace>offline_decode_files</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -12,8 +12,6 @@ | @@ -12,8 +12,6 @@ | ||
| 12 | // dotnet run | 12 | // dotnet run |
| 13 | 13 | ||
| 14 | using SherpaOnnx; | 14 | using SherpaOnnx; |
| 15 | -using System.Collections.Generic; | ||
| 16 | -using System; | ||
| 17 | 15 | ||
| 18 | class OfflinePunctuationDemo | 16 | class OfflinePunctuationDemo |
| 19 | { | 17 | { |
| @@ -25,14 +23,14 @@ class OfflinePunctuationDemo | @@ -25,14 +23,14 @@ class OfflinePunctuationDemo | ||
| 25 | config.Model.NumThreads = 1; | 23 | config.Model.NumThreads = 1; |
| 26 | var punct = new OfflinePunctuation(config); | 24 | var punct = new OfflinePunctuation(config); |
| 27 | 25 | ||
| 28 | - string[] textList = new string[] { | 26 | + var textList = new string[] { |
| 29 | "这是一个测试你好吗How are you我很好thank you are you ok谢谢你", | 27 | "这是一个测试你好吗How are you我很好thank you are you ok谢谢你", |
| 30 | "我们都是木头人不会说话不会动", | 28 | "我们都是木头人不会说话不会动", |
| 31 | "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", | 29 | "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", |
| 32 | }; | 30 | }; |
| 33 | 31 | ||
| 34 | Console.WriteLine("---------"); | 32 | Console.WriteLine("---------"); |
| 35 | - foreach (string text in textList) | 33 | + foreach (var text in textList) |
| 36 | { | 34 | { |
| 37 | string textWithPunct = punct.AddPunct(text); | 35 | string textWithPunct = punct.AddPunct(text); |
| 38 | Console.WriteLine("Input text: {0}", text); | 36 | Console.WriteLine("Input text: {0}", text); |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>offline_punctuation</RootNamespace> | 6 | <RootNamespace>offline_punctuation</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -34,7 +34,6 @@ Step 4. Run it | @@ -34,7 +34,6 @@ Step 4. Run it | ||
| 34 | */ | 34 | */ |
| 35 | 35 | ||
| 36 | using SherpaOnnx; | 36 | using SherpaOnnx; |
| 37 | -using System; | ||
| 38 | 37 | ||
| 39 | class OfflineSpeakerDiarizationDemo | 38 | class OfflineSpeakerDiarizationDemo |
| 40 | { | 39 | { |
| @@ -54,7 +53,7 @@ class OfflineSpeakerDiarizationDemo | @@ -54,7 +53,7 @@ class OfflineSpeakerDiarizationDemo | ||
| 54 | var sd = new OfflineSpeakerDiarization(config); | 53 | var sd = new OfflineSpeakerDiarization(config); |
| 55 | 54 | ||
| 56 | var testWaveFile = "./0-four-speakers-zh.wav"; | 55 | var testWaveFile = "./0-four-speakers-zh.wav"; |
| 57 | - WaveReader waveReader = new WaveReader(testWaveFile); | 56 | + var waveReader = new WaveReader(testWaveFile); |
| 58 | if (sd.SampleRate != waveReader.SampleRate) | 57 | if (sd.SampleRate != waveReader.SampleRate) |
| 59 | { | 58 | { |
| 60 | Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"); | 59 | Console.WriteLine($"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"); |
| @@ -65,19 +64,19 @@ class OfflineSpeakerDiarizationDemo | @@ -65,19 +64,19 @@ class OfflineSpeakerDiarizationDemo | ||
| 65 | 64 | ||
| 66 | // var segments = sd.Process(waveReader.Samples); // this one is also ok | 65 | // var segments = sd.Process(waveReader.Samples); // this one is also ok |
| 67 | 66 | ||
| 68 | - var MyProgressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) => | 67 | + var progressCallback = (int numProcessedChunks, int numTotalChunks, IntPtr arg) => |
| 69 | { | 68 | { |
| 70 | - float progress = 100.0F * numProcessedChunks / numTotalChunks; | ||
| 71 | - Console.WriteLine("Progress {0}%", String.Format("{0:0.00}", progress)); | 69 | + var progress = 100.0F * numProcessedChunks / numTotalChunks; |
| 70 | + Console.WriteLine("Progress {0}%", string.Format("{0:0.00}", progress)); | ||
| 72 | return 0; | 71 | return 0; |
| 73 | }; | 72 | }; |
| 74 | 73 | ||
| 75 | - var callback = new OfflineSpeakerDiarizationProgressCallback(MyProgressCallback); | 74 | + var callback = new OfflineSpeakerDiarizationProgressCallback(progressCallback); |
| 76 | var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero); | 75 | var segments = sd.ProcessWithCallback(waveReader.Samples, callback, IntPtr.Zero); |
| 77 | 76 | ||
| 78 | foreach (var s in segments) | 77 | foreach (var s in segments) |
| 79 | { | 78 | { |
| 80 | - Console.WriteLine("{0} -- {1} speaker_{2}", String.Format("{0:0.00}", s.Start), String.Format("{0:0.00}", s.End), s.Speaker); | 79 | + Console.WriteLine("{0} -- {1} speaker_{2}", string.Format("{0:0.00}", s.Start), string.Format("{0:0.00}", s.End), s.Speaker); |
| 81 | } | 80 | } |
| 82 | } | 81 | } |
| 83 | } | 82 | } |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>offline_speaker_diarization</RootNamespace> | 6 | <RootNamespace>offline_speaker_diarization</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -10,15 +10,12 @@ | @@ -10,15 +10,12 @@ | ||
| 10 | // Note that you need a speaker to run this file since it will play | 10 | // Note that you need a speaker to run this file since it will play |
| 11 | // the generated audio as it is generating. | 11 | // the generated audio as it is generating. |
| 12 | 12 | ||
| 13 | -using CommandLine.Text; | ||
| 14 | using CommandLine; | 13 | using CommandLine; |
| 14 | +using CommandLine.Text; | ||
| 15 | using PortAudioSharp; | 15 | using PortAudioSharp; |
| 16 | using SherpaOnnx; | 16 | using SherpaOnnx; |
| 17 | using System.Collections.Concurrent; | 17 | using System.Collections.Concurrent; |
| 18 | -using System.Collections.Generic; | ||
| 19 | using System.Runtime.InteropServices; | 18 | using System.Runtime.InteropServices; |
| 20 | -using System.Threading; | ||
| 21 | -using System; | ||
| 22 | 19 | ||
| 23 | class OfflineTtsPlayDemo | 20 | class OfflineTtsPlayDemo |
| 24 | { | 21 | { |
| @@ -26,13 +23,13 @@ class OfflineTtsPlayDemo | @@ -26,13 +23,13 @@ class OfflineTtsPlayDemo | ||
| 26 | { | 23 | { |
| 27 | 24 | ||
| 28 | [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] | 25 | [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] |
| 29 | - public string RuleFsts { get; set; } | 26 | + public string? RuleFsts { get; set; } |
| 30 | 27 | ||
| 31 | [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] | 28 | [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] |
| 32 | - public string DictDir { get; set; } | 29 | + public string? DictDir { get; set; } |
| 33 | 30 | ||
| 34 | [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] | 31 | [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] |
| 35 | - public string DataDir { get; set; } | 32 | + public string? DataDir { get; set; } |
| 36 | 33 | ||
| 37 | [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] | 34 | [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] |
| 38 | public float LengthScale { get; set; } | 35 | public float LengthScale { get; set; } |
| @@ -44,10 +41,10 @@ class OfflineTtsPlayDemo | @@ -44,10 +41,10 @@ class OfflineTtsPlayDemo | ||
| 44 | public float NoiseScaleW { get; set; } | 41 | public float NoiseScaleW { get; set; } |
| 45 | 42 | ||
| 46 | [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] | 43 | [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] |
| 47 | - public string Lexicon { get; set; } | 44 | + public string? Lexicon { get; set; } |
| 48 | 45 | ||
| 49 | [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] | 46 | [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] |
| 50 | - public string Tokens { get; set; } | 47 | + public string? Tokens { get; set; } |
| 51 | 48 | ||
| 52 | [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] | 49 | [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] |
| 53 | public int MaxNumSentences { get; set; } | 50 | public int MaxNumSentences { get; set; } |
| @@ -56,16 +53,16 @@ class OfflineTtsPlayDemo | @@ -56,16 +53,16 @@ class OfflineTtsPlayDemo | ||
| 56 | public int Debug { get; set; } | 53 | public int Debug { get; set; } |
| 57 | 54 | ||
| 58 | [Option("vits-model", Required = true, HelpText = "Path to VITS model")] | 55 | [Option("vits-model", Required = true, HelpText = "Path to VITS model")] |
| 59 | - public string Model { get; set; } | 56 | + public string? Model { get; set; } |
| 60 | 57 | ||
| 61 | [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] | 58 | [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] |
| 62 | public int SpeakerId { get; set; } | 59 | public int SpeakerId { get; set; } |
| 63 | 60 | ||
| 64 | [Option("text", Required = true, HelpText = "Text to synthesize")] | 61 | [Option("text", Required = true, HelpText = "Text to synthesize")] |
| 65 | - public string Text { get; set; } | 62 | + public string? Text { get; set; } |
| 66 | 63 | ||
| 67 | [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] | 64 | [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] |
| 68 | - public string OutputFilename { get; set; } | 65 | + public string? OutputFilename { get; set; } |
| 69 | } | 66 | } |
| 70 | 67 | ||
| 71 | static void Main(string[] args) | 68 | static void Main(string[] args) |
| @@ -124,10 +121,9 @@ to download more models. | @@ -124,10 +121,9 @@ to download more models. | ||
| 124 | Console.WriteLine(helpText); | 121 | Console.WriteLine(helpText); |
| 125 | } | 122 | } |
| 126 | 123 | ||
| 127 | - | ||
| 128 | private static void Run(Options options) | 124 | private static void Run(Options options) |
| 129 | { | 125 | { |
| 130 | - OfflineTtsConfig config = new OfflineTtsConfig(); | 126 | + var config = new OfflineTtsConfig(); |
| 131 | config.Model.Vits.Model = options.Model; | 127 | config.Model.Vits.Model = options.Model; |
| 132 | config.Model.Vits.Lexicon = options.Lexicon; | 128 | config.Model.Vits.Lexicon = options.Lexicon; |
| 133 | config.Model.Vits.Tokens = options.Tokens; | 129 | config.Model.Vits.Tokens = options.Tokens; |
| @@ -142,10 +138,9 @@ to download more models. | @@ -142,10 +138,9 @@ to download more models. | ||
| 142 | config.RuleFsts = options.RuleFsts; | 138 | config.RuleFsts = options.RuleFsts; |
| 143 | config.MaxNumSentences = options.MaxNumSentences; | 139 | config.MaxNumSentences = options.MaxNumSentences; |
| 144 | 140 | ||
| 145 | - OfflineTts tts = new OfflineTts(config); | ||
| 146 | - float speed = 1.0f / options.LengthScale; | ||
| 147 | - int sid = options.SpeakerId; | ||
| 148 | - | 141 | + var tts = new OfflineTts(config); |
| 142 | + var speed = 1.0f / options.LengthScale; | ||
| 143 | + var sid = options.SpeakerId; | ||
| 149 | 144 | ||
| 150 | Console.WriteLine(PortAudio.VersionInfo.versionText); | 145 | Console.WriteLine(PortAudio.VersionInfo.versionText); |
| 151 | PortAudio.Initialize(); | 146 | PortAudio.Initialize(); |
| @@ -166,11 +161,11 @@ to download more models. | @@ -166,11 +161,11 @@ to download more models. | ||
| 166 | Environment.Exit(1); | 161 | Environment.Exit(1); |
| 167 | } | 162 | } |
| 168 | 163 | ||
| 169 | - DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); | 164 | + var info = PortAudio.GetDeviceInfo(deviceIndex); |
| 170 | Console.WriteLine(); | 165 | Console.WriteLine(); |
| 171 | Console.WriteLine($"Use output default device {deviceIndex} ({info.name})"); | 166 | Console.WriteLine($"Use output default device {deviceIndex} ({info.name})"); |
| 172 | 167 | ||
| 173 | - StreamParameters param = new StreamParameters(); | 168 | + var param = new StreamParameters(); |
| 174 | param.device = deviceIndex; | 169 | param.device = deviceIndex; |
| 175 | param.channelCount = 1; | 170 | param.channelCount = 1; |
| 176 | param.sampleFormat = SampleFormat.Float32; | 171 | param.sampleFormat = SampleFormat.Float32; |
| @@ -178,7 +173,7 @@ to download more models. | @@ -178,7 +173,7 @@ to download more models. | ||
| 178 | param.hostApiSpecificStreamInfo = IntPtr.Zero; | 173 | param.hostApiSpecificStreamInfo = IntPtr.Zero; |
| 179 | 174 | ||
| 180 | // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview | 175 | // https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview |
| 181 | - BlockingCollection<float[]> dataItems = new BlockingCollection<float[]>(); | 176 | + var dataItems = new BlockingCollection<float[]>(); |
| 182 | 177 | ||
| 183 | var MyCallback = (IntPtr samples, int n) => | 178 | var MyCallback = (IntPtr samples, int n) => |
| 184 | { | 179 | { |
| @@ -193,9 +188,9 @@ to download more models. | @@ -193,9 +188,9 @@ to download more models. | ||
| 193 | return 1; | 188 | return 1; |
| 194 | }; | 189 | }; |
| 195 | 190 | ||
| 196 | - bool playFinished = false; | 191 | + var playFinished = false; |
| 197 | 192 | ||
| 198 | - float[] lastSampleArray = null; | 193 | + float[]? lastSampleArray = null; |
| 199 | int lastIndex = 0; // not played | 194 | int lastIndex = 0; // not played |
| 200 | 195 | ||
| 201 | PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output, | 196 | PortAudioSharp.Stream.Callback playCallback = (IntPtr input, IntPtr output, |
| @@ -270,10 +265,10 @@ to download more models. | @@ -270,10 +265,10 @@ to download more models. | ||
| 270 | 265 | ||
| 271 | stream.Start(); | 266 | stream.Start(); |
| 272 | 267 | ||
| 273 | - OfflineTtsCallback callback = new OfflineTtsCallback(MyCallback); | 268 | + var callback = new OfflineTtsCallback(MyCallback); |
| 274 | 269 | ||
| 275 | - OfflineTtsGeneratedAudio audio = tts.GenerateWithCallback(options.Text, speed, sid, callback); | ||
| 276 | - bool ok = audio.SaveToWaveFile(options.OutputFilename); | 270 | + var audio = tts.GenerateWithCallback(options.Text, speed, sid, callback); |
| 271 | + var ok = audio.SaveToWaveFile(options.OutputFilename); | ||
| 277 | 272 | ||
| 278 | if (ok) | 273 | if (ok) |
| 279 | { | 274 | { |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>offline_tts_play</RootNamespace> | 6 | <RootNamespace>offline_tts_play</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -6,28 +6,25 @@ | @@ -6,28 +6,25 @@ | ||
| 6 | // and | 6 | // and |
| 7 | // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | 7 | // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models |
| 8 | // to download pre-trained models | 8 | // to download pre-trained models |
| 9 | -using CommandLine.Text; | ||
| 10 | using CommandLine; | 9 | using CommandLine; |
| 10 | +using CommandLine.Text; | ||
| 11 | using SherpaOnnx; | 11 | using SherpaOnnx; |
| 12 | -using System.Collections.Generic; | ||
| 13 | -using System; | ||
| 14 | 12 | ||
| 15 | class OfflineTtsDemo | 13 | class OfflineTtsDemo |
| 16 | { | 14 | { |
| 17 | class Options | 15 | class Options |
| 18 | { | 16 | { |
| 19 | - | ||
| 20 | [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] | 17 | [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")] |
| 21 | - public string RuleFsts { get; set; } = ""; | 18 | + public string RuleFsts { get; set; } = string.Empty; |
| 22 | 19 | ||
| 23 | [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")] | 20 | [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")] |
| 24 | - public string RuleFars { get; set; } = ""; | 21 | + public string RuleFars { get; set; } = string.Empty; |
| 25 | 22 | ||
| 26 | [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] | 23 | [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")] |
| 27 | - public string DictDir { get; set; } = ""; | 24 | + public string DictDir { get; set; } = string.Empty; |
| 28 | 25 | ||
| 29 | [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] | 26 | [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")] |
| 30 | - public string DataDir { get; set; } = ""; | 27 | + public string DataDir { get; set; } = string.Empty; |
| 31 | 28 | ||
| 32 | [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] | 29 | [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")] |
| 33 | public float LengthScale { get; set; } = 1; | 30 | public float LengthScale { get; set; } = 1; |
| @@ -39,10 +36,10 @@ class OfflineTtsDemo | @@ -39,10 +36,10 @@ class OfflineTtsDemo | ||
| 39 | public float NoiseScaleW { get; set; } = 0.8F; | 36 | public float NoiseScaleW { get; set; } = 0.8F; |
| 40 | 37 | ||
| 41 | [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] | 38 | [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")] |
| 42 | - public string Lexicon { get; set; } = ""; | 39 | + public string Lexicon { get; set; } = string.Empty; |
| 43 | 40 | ||
| 44 | [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] | 41 | [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")] |
| 45 | - public string Tokens { get; set; } = ""; | 42 | + public string Tokens { get; set; } = string.Empty; |
| 46 | 43 | ||
| 47 | [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] | 44 | [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")] |
| 48 | public int MaxNumSentences { get; set; } = 1; | 45 | public int MaxNumSentences { get; set; } = 1; |
| @@ -51,13 +48,13 @@ class OfflineTtsDemo | @@ -51,13 +48,13 @@ class OfflineTtsDemo | ||
| 51 | public int Debug { get; set; } = 0; | 48 | public int Debug { get; set; } = 0; |
| 52 | 49 | ||
| 53 | [Option("vits-model", Required = true, HelpText = "Path to VITS model")] | 50 | [Option("vits-model", Required = true, HelpText = "Path to VITS model")] |
| 54 | - public string Model { get; set; } = ""; | 51 | + public string Model { get; set; } = string.Empty; |
| 55 | 52 | ||
| 56 | [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] | 53 | [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")] |
| 57 | public int SpeakerId { get; set; } = 0; | 54 | public int SpeakerId { get; set; } = 0; |
| 58 | 55 | ||
| 59 | [Option("text", Required = true, HelpText = "Text to synthesize")] | 56 | [Option("text", Required = true, HelpText = "Text to synthesize")] |
| 60 | - public string Text { get; set; } = ""; | 57 | + public string Text { get; set; } = string.Empty; |
| 61 | 58 | ||
| 62 | [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] | 59 | [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")] |
| 63 | public string OutputFilename { get; set; } = "./generated.wav"; | 60 | public string OutputFilename { get; set; } = "./generated.wav"; |
| @@ -65,7 +62,7 @@ class OfflineTtsDemo | @@ -65,7 +62,7 @@ class OfflineTtsDemo | ||
| 65 | 62 | ||
| 66 | static void Main(string[] args) | 63 | static void Main(string[] args) |
| 67 | { | 64 | { |
| 68 | - var parser = new CommandLine.Parser(with => with.HelpWriter = null); | 65 | + var parser = new Parser(with => with.HelpWriter = null); |
| 69 | var parserResult = parser.ParseArguments<Options>(args); | 66 | var parserResult = parser.ParseArguments<Options>(args); |
| 70 | 67 | ||
| 71 | parserResult | 68 | parserResult |
| @@ -75,7 +72,7 @@ class OfflineTtsDemo | @@ -75,7 +72,7 @@ class OfflineTtsDemo | ||
| 75 | 72 | ||
| 76 | private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) | 73 | private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) |
| 77 | { | 74 | { |
| 78 | - string usage = @" | 75 | + var usage = @" |
| 79 | # vits-aishell3 | 76 | # vits-aishell3 |
| 80 | 77 | ||
| 81 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 | 78 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 |
| @@ -122,7 +119,7 @@ to download more models. | @@ -122,7 +119,7 @@ to download more models. | ||
| 122 | 119 | ||
| 123 | private static void Run(Options options) | 120 | private static void Run(Options options) |
| 124 | { | 121 | { |
| 125 | - OfflineTtsConfig config = new OfflineTtsConfig(); | 122 | + var config = new OfflineTtsConfig(); |
| 126 | config.Model.Vits.Model = options.Model; | 123 | config.Model.Vits.Model = options.Model; |
| 127 | config.Model.Vits.Lexicon = options.Lexicon; | 124 | config.Model.Vits.Lexicon = options.Lexicon; |
| 128 | config.Model.Vits.Tokens = options.Tokens; | 125 | config.Model.Vits.Tokens = options.Tokens; |
| @@ -138,11 +135,11 @@ to download more models. | @@ -138,11 +135,11 @@ to download more models. | ||
| 138 | config.RuleFars = options.RuleFars; | 135 | config.RuleFars = options.RuleFars; |
| 139 | config.MaxNumSentences = options.MaxNumSentences; | 136 | config.MaxNumSentences = options.MaxNumSentences; |
| 140 | 137 | ||
| 141 | - OfflineTts tts = new OfflineTts(config); | ||
| 142 | - float speed = 1.0f / options.LengthScale; | ||
| 143 | - int sid = options.SpeakerId; | ||
| 144 | - OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid); | ||
| 145 | - bool ok = audio.SaveToWaveFile(options.OutputFilename); | 138 | + var tts = new OfflineTts(config); |
| 139 | + var speed = 1.0f / options.LengthScale; | ||
| 140 | + var sid = options.SpeakerId; | ||
| 141 | + var audio = tts.Generate(options.Text, speed, sid); | ||
| 142 | + var ok = audio.SaveToWaveFile(options.OutputFilename); | ||
| 146 | 143 | ||
| 147 | if (ok) | 144 | if (ok) |
| 148 | { | 145 | { |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>offline_tts</RootNamespace> | 6 | <RootNamespace>offline_tts</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -6,40 +6,37 @@ | @@ -6,40 +6,37 @@ | ||
| 6 | // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html | 6 | // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html |
| 7 | // to download streaming models | 7 | // to download streaming models |
| 8 | 8 | ||
| 9 | -using CommandLine.Text; | ||
| 10 | using CommandLine; | 9 | using CommandLine; |
| 10 | +using CommandLine.Text; | ||
| 11 | using SherpaOnnx; | 11 | using SherpaOnnx; |
| 12 | -using System.Collections.Generic; | ||
| 13 | -using System.Linq; | ||
| 14 | -using System; | ||
| 15 | 12 | ||
| 16 | class OnlineDecodeFiles | 13 | class OnlineDecodeFiles |
| 17 | { | 14 | { |
| 18 | class Options | 15 | class Options |
| 19 | { | 16 | { |
| 20 | [Option(Required = true, HelpText = "Path to tokens.txt")] | 17 | [Option(Required = true, HelpText = "Path to tokens.txt")] |
| 21 | - public string Tokens { get; set; } = ""; | 18 | + public string Tokens { get; set; } = string.Empty; |
| 22 | 19 | ||
| 23 | [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] | 20 | [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] |
| 24 | - public string Provider { get; set; } = ""; | 21 | + public string Provider { get; set; } = string.Empty; |
| 25 | 22 | ||
| 26 | [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] | 23 | [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] |
| 27 | - public string Encoder { get; set; } = ""; | 24 | + public string Encoder { get; set; } = string.Empty; |
| 28 | 25 | ||
| 29 | [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] | 26 | [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] |
| 30 | - public string Decoder { get; set; } = ""; | 27 | + public string Decoder { get; set; } = string.Empty; |
| 31 | 28 | ||
| 32 | [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] | 29 | [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] |
| 33 | - public string Joiner { get; set; } = ""; | 30 | + public string Joiner { get; set; } = string.Empty; |
| 34 | 31 | ||
| 35 | [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] | 32 | [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] |
| 36 | - public string ParaformerEncoder { get; set; } = ""; | 33 | + public string ParaformerEncoder { get; set; } = string.Empty; |
| 37 | 34 | ||
| 38 | [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] | 35 | [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] |
| 39 | - public string ParaformerDecoder { get; set; } = ""; | 36 | + public string ParaformerDecoder { get; set; } = string.Empty; |
| 40 | 37 | ||
| 41 | [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] | 38 | [Option("zipformer2-ctc", Required = false, HelpText = "Path to zipformer2 CTC onnx model")] |
| 42 | - public string Zipformer2Ctc { get; set; } = ""; | 39 | + public string Zipformer2Ctc { get; set; } = string.Empty; |
| 43 | 40 | ||
| 44 | [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] | 41 | [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] |
| 45 | public int NumThreads { get; set; } = 1; | 42 | public int NumThreads { get; set; } = 1; |
| @@ -80,15 +77,14 @@ larger than this value. Used only when --enable-endpoint is true.")] | @@ -80,15 +77,14 @@ larger than this value. Used only when --enable-endpoint is true.")] | ||
| 80 | public float Rule3MinUtteranceLength { get; set; } = 20.0F; | 77 | public float Rule3MinUtteranceLength { get; set; } = 20.0F; |
| 81 | 78 | ||
| 82 | [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] | 79 | [Option("hotwords-file", Required = false, Default = "", HelpText = "Path to hotwords.txt")] |
| 83 | - public string HotwordsFile { get; set; } = ""; | 80 | + public string HotwordsFile { get; set; } = string.Empty; |
| 84 | 81 | ||
| 85 | [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] | 82 | [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] |
| 86 | public float HotwordsScore { get; set; } = 1.5F; | 83 | public float HotwordsScore { get; set; } = 1.5F; |
| 87 | 84 | ||
| 88 | [Option("rule-fsts", Required = false, Default = "", | 85 | [Option("rule-fsts", Required = false, Default = "", |
| 89 | HelpText = "If not empty, path to rule fst for inverse text normalization")] | 86 | HelpText = "If not empty, path to rule fst for inverse text normalization")] |
| 90 | - public string RuleFsts { get; set; } = ""; | ||
| 91 | - | 87 | + public string RuleFsts { get; set; } = string.Empty; |
| 92 | 88 | ||
| 93 | [Option("files", Required = true, HelpText = "Audio files for decoding")] | 89 | [Option("files", Required = true, HelpText = "Audio files for decoding")] |
| 94 | public IEnumerable<string> Files { get; set; } = new string[] {}; | 90 | public IEnumerable<string> Files { get; set; } = new string[] {}; |
| @@ -162,7 +158,7 @@ to download pre-trained streaming models. | @@ -162,7 +158,7 @@ to download pre-trained streaming models. | ||
| 162 | 158 | ||
| 163 | private static void Run(Options options) | 159 | private static void Run(Options options) |
| 164 | { | 160 | { |
| 165 | - OnlineRecognizerConfig config = new OnlineRecognizerConfig(); | 161 | + var config = new OnlineRecognizerConfig(); |
| 166 | config.FeatConfig.SampleRate = options.SampleRate; | 162 | config.FeatConfig.SampleRate = options.SampleRate; |
| 167 | 163 | ||
| 168 | // All models from icefall using feature dim 80. | 164 | // All models from icefall using feature dim 80. |
| @@ -194,22 +190,22 @@ to download pre-trained streaming models. | @@ -194,22 +190,22 @@ to download pre-trained streaming models. | ||
| 194 | config.HotwordsScore = options.HotwordsScore; | 190 | config.HotwordsScore = options.HotwordsScore; |
| 195 | config.RuleFsts = options.RuleFsts; | 191 | config.RuleFsts = options.RuleFsts; |
| 196 | 192 | ||
| 197 | - OnlineRecognizer recognizer = new OnlineRecognizer(config); | 193 | + var recognizer = new OnlineRecognizer(config); |
| 198 | 194 | ||
| 199 | - string[] files = options.Files.ToArray(); | 195 | + var files = options.Files.ToArray(); |
| 200 | 196 | ||
| 201 | // We create a separate stream for each file | 197 | // We create a separate stream for each file |
| 202 | - List<OnlineStream> streams = new List<OnlineStream>(); | 198 | + var streams = new List<OnlineStream>(); |
| 203 | streams.EnsureCapacity(files.Length); | 199 | streams.EnsureCapacity(files.Length); |
| 204 | 200 | ||
| 205 | for (int i = 0; i != files.Length; ++i) | 201 | for (int i = 0; i != files.Length; ++i) |
| 206 | { | 202 | { |
| 207 | - OnlineStream s = recognizer.CreateStream(); | 203 | + var s = recognizer.CreateStream(); |
| 208 | 204 | ||
| 209 | - WaveReader waveReader = new WaveReader(files[i]); | 205 | + var waveReader = new WaveReader(files[i]); |
| 210 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | 206 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); |
| 211 | 207 | ||
| 212 | - float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | 208 | + var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; |
| 213 | s.AcceptWaveform(waveReader.SampleRate, tailPadding); | 209 | s.AcceptWaveform(waveReader.SampleRate, tailPadding); |
| 214 | s.InputFinished(); | 210 | s.InputFinished(); |
| 215 | 211 | ||
| @@ -230,7 +226,7 @@ to download pre-trained streaming models. | @@ -230,7 +226,7 @@ to download pre-trained streaming models. | ||
| 230 | // display results | 226 | // display results |
| 231 | for (int i = 0; i != files.Length; ++i) | 227 | for (int i = 0; i != files.Length; ++i) |
| 232 | { | 228 | { |
| 233 | - OnlineRecognizerResult r = recognizer.GetResult(streams[i]); | 229 | + var r = recognizer.GetResult(streams[i]); |
| 234 | var text = r.Text; | 230 | var text = r.Text; |
| 235 | var tokens = r.Tokens; | 231 | var tokens = r.Tokens; |
| 236 | Console.WriteLine("--------------------"); | 232 | Console.WriteLine("--------------------"); |
| @@ -238,7 +234,7 @@ to download pre-trained streaming models. | @@ -238,7 +234,7 @@ to download pre-trained streaming models. | ||
| 238 | Console.WriteLine("text: {0}", text); | 234 | Console.WriteLine("text: {0}", text); |
| 239 | Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); | 235 | Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); |
| 240 | Console.Write("timestamps: ["); | 236 | Console.Write("timestamps: ["); |
| 241 | - r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); | 237 | + r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", ")); |
| 242 | Console.WriteLine("]"); | 238 | Console.WriteLine("]"); |
| 243 | } | 239 | } |
| 244 | Console.WriteLine("--------------------"); | 240 | Console.WriteLine("--------------------"); |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>online_decode_files</RootNamespace> | 6 | <RootNamespace>online_decode_files</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -29,9 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files | @@ -29,9 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files | ||
| 29 | EndProject | 29 | EndProject |
| 30 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}" | 30 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}" |
| 31 | EndProject | 31 | EndProject |
| 32 | -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}" | ||
| 33 | -EndProject | ||
| 34 | -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}" | 32 | +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}" |
| 35 | EndProject | 33 | EndProject |
| 36 | Global | 34 | Global |
| 37 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 35 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| @@ -91,10 +89,6 @@ Global | @@ -91,10 +89,6 @@ Global | ||
| 91 | {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU | 89 | {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 92 | {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU | 90 | {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 93 | {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU | 91 | {AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU |
| 94 | - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 95 | - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 96 | - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 97 | - {DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 98 | {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | 92 | {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU |
| 99 | {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU | 93 | {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 100 | {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU | 94 | {D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| @@ -16,20 +16,18 @@ | @@ -16,20 +16,18 @@ | ||
| 16 | // dotnet run | 16 | // dotnet run |
| 17 | 17 | ||
| 18 | using SherpaOnnx; | 18 | using SherpaOnnx; |
| 19 | -using System.Collections.Generic; | ||
| 20 | -using System; | ||
| 21 | 19 | ||
| 22 | class SpeakerIdentificationDemo | 20 | class SpeakerIdentificationDemo |
| 23 | { | 21 | { |
| 24 | - public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename) | 22 | + public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, string filename) |
| 25 | { | 23 | { |
| 26 | - WaveReader reader = new WaveReader(filename); | 24 | + var reader = new WaveReader(filename); |
| 27 | 25 | ||
| 28 | - OnlineStream stream = extractor.CreateStream(); | 26 | + var stream = extractor.CreateStream(); |
| 29 | stream.AcceptWaveform(reader.SampleRate, reader.Samples); | 27 | stream.AcceptWaveform(reader.SampleRate, reader.Samples); |
| 30 | stream.InputFinished(); | 28 | stream.InputFinished(); |
| 31 | 29 | ||
| 32 | - float[] embedding = extractor.Compute(stream); | 30 | + var embedding = extractor.Compute(stream); |
| 33 | 31 | ||
| 34 | return embedding; | 32 | return embedding; |
| 35 | } | 33 | } |
| @@ -43,25 +41,25 @@ class SpeakerIdentificationDemo | @@ -43,25 +41,25 @@ class SpeakerIdentificationDemo | ||
| 43 | 41 | ||
| 44 | var manager = new SpeakerEmbeddingManager(extractor.Dim); | 42 | var manager = new SpeakerEmbeddingManager(extractor.Dim); |
| 45 | 43 | ||
| 46 | - string[] spk1Files = | 44 | + var spk1Files = |
| 47 | new string[] { | 45 | new string[] { |
| 48 | "./sr-data/enroll/fangjun-sr-1.wav", | 46 | "./sr-data/enroll/fangjun-sr-1.wav", |
| 49 | "./sr-data/enroll/fangjun-sr-2.wav", | 47 | "./sr-data/enroll/fangjun-sr-2.wav", |
| 50 | "./sr-data/enroll/fangjun-sr-3.wav", | 48 | "./sr-data/enroll/fangjun-sr-3.wav", |
| 51 | }; | 49 | }; |
| 52 | - float[][] spk1Vec = new float[spk1Files.Length][]; | 50 | + var spk1Vec = new float[spk1Files.Length][]; |
| 53 | 51 | ||
| 54 | for (int i = 0; i < spk1Files.Length; ++i) | 52 | for (int i = 0; i < spk1Files.Length; ++i) |
| 55 | { | 53 | { |
| 56 | spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]); | 54 | spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]); |
| 57 | } | 55 | } |
| 58 | 56 | ||
| 59 | - string[] spk2Files = | 57 | + var spk2Files = |
| 60 | new string[] { | 58 | new string[] { |
| 61 | "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav", | 59 | "./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav", |
| 62 | }; | 60 | }; |
| 63 | 61 | ||
| 64 | - float[][] spk2Vec = new float[spk2Files.Length][]; | 62 | + var spk2Vec = new float[spk2Files.Length][]; |
| 65 | 63 | ||
| 66 | for (int i = 0; i < spk2Files.Length; ++i) | 64 | for (int i = 0; i < spk2Files.Length; ++i) |
| 67 | { | 65 | { |
| @@ -100,14 +98,14 @@ class SpeakerIdentificationDemo | @@ -100,14 +98,14 @@ class SpeakerIdentificationDemo | ||
| 100 | 98 | ||
| 101 | Console.WriteLine("---All speakers---"); | 99 | Console.WriteLine("---All speakers---"); |
| 102 | 100 | ||
| 103 | - string[] allSpeakers = manager.GetAllSpeakers(); | 101 | + var allSpeakers = manager.GetAllSpeakers(); |
| 104 | foreach (var s in allSpeakers) | 102 | foreach (var s in allSpeakers) |
| 105 | { | 103 | { |
| 106 | Console.WriteLine(s); | 104 | Console.WriteLine(s); |
| 107 | } | 105 | } |
| 108 | Console.WriteLine("------------"); | 106 | Console.WriteLine("------------"); |
| 109 | 107 | ||
| 110 | - string[] testFiles = | 108 | + var testFiles = |
| 111 | new string[] { | 109 | new string[] { |
| 112 | "./sr-data/test/fangjun-test-sr-1.wav", | 110 | "./sr-data/test/fangjun-test-sr-1.wav", |
| 113 | "./sr-data/test/leijun-test-sr-1.wav", | 111 | "./sr-data/test/leijun-test-sr-1.wav", |
| @@ -117,9 +115,9 @@ class SpeakerIdentificationDemo | @@ -117,9 +115,9 @@ class SpeakerIdentificationDemo | ||
| 117 | float threshold = 0.6f; | 115 | float threshold = 0.6f; |
| 118 | foreach (var file in testFiles) | 116 | foreach (var file in testFiles) |
| 119 | { | 117 | { |
| 120 | - float[] embedding = ComputeEmbedding(extractor, file); | 118 | + var embedding = ComputeEmbedding(extractor, file); |
| 121 | 119 | ||
| 122 | - String name = manager.Search(embedding, threshold); | 120 | + var name = manager.Search(embedding, threshold); |
| 123 | if (name == "") | 121 | if (name == "") |
| 124 | { | 122 | { |
| 125 | name = "<Unknown>"; | 123 | name = "<Unknown>"; |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>speaker_identification</RootNamespace> | 6 | <RootNamespace>speaker_identification</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -6,47 +6,43 @@ | @@ -6,47 +6,43 @@ | ||
| 6 | // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html | 6 | // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html |
| 7 | // to download streaming models | 7 | // to download streaming models |
| 8 | 8 | ||
| 9 | -using CommandLine.Text; | ||
| 10 | using CommandLine; | 9 | using CommandLine; |
| 10 | +using CommandLine.Text; | ||
| 11 | using PortAudioSharp; | 11 | using PortAudioSharp; |
| 12 | -using System.Threading; | ||
| 13 | using SherpaOnnx; | 12 | using SherpaOnnx; |
| 14 | -using System.Collections.Generic; | ||
| 15 | using System.Runtime.InteropServices; | 13 | using System.Runtime.InteropServices; |
| 16 | -using System; | ||
| 17 | - | ||
| 18 | 14 | ||
| 19 | class SpeechRecognitionFromMicrophone | 15 | class SpeechRecognitionFromMicrophone |
| 20 | { | 16 | { |
| 21 | class Options | 17 | class Options |
| 22 | { | 18 | { |
| 23 | [Option(Required = true, HelpText = "Path to tokens.txt")] | 19 | [Option(Required = true, HelpText = "Path to tokens.txt")] |
| 24 | - public string Tokens { get; set; } | 20 | + public string? Tokens { get; set; } |
| 25 | 21 | ||
| 26 | [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] | 22 | [Option(Required = false, Default = "cpu", HelpText = "Provider, e.g., cpu, coreml")] |
| 27 | - public string Provider { get; set; } | 23 | + public string? Provider { get; set; } |
| 28 | 24 | ||
| 29 | [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] | 25 | [Option(Required = false, HelpText = "Path to transducer encoder.onnx")] |
| 30 | - public string Encoder { get; set; } | 26 | + public string? Encoder { get; set; } |
| 31 | 27 | ||
| 32 | [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] | 28 | [Option(Required = false, HelpText = "Path to transducer decoder.onnx")] |
| 33 | - public string Decoder { get; set; } | 29 | + public string? Decoder { get; set; } |
| 34 | 30 | ||
| 35 | [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] | 31 | [Option(Required = false, HelpText = "Path to transducer joiner.onnx")] |
| 36 | - public string Joiner { get; set; } | 32 | + public string? Joiner { get; set; } |
| 37 | 33 | ||
| 38 | [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] | 34 | [Option("paraformer-encoder", Required = false, HelpText = "Path to paraformer encoder.onnx")] |
| 39 | - public string ParaformerEncoder { get; set; } | 35 | + public string? ParaformerEncoder { get; set; } |
| 40 | 36 | ||
| 41 | [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] | 37 | [Option("paraformer-decoder", Required = false, HelpText = "Path to paraformer decoder.onnx")] |
| 42 | - public string ParaformerDecoder { get; set; } | 38 | + public string? ParaformerDecoder { get; set; } |
| 43 | 39 | ||
| 44 | [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] | 40 | [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] |
| 45 | public int NumThreads { get; set; } | 41 | public int NumThreads { get; set; } |
| 46 | 42 | ||
| 47 | [Option("decoding-method", Required = false, Default = "greedy_search", | 43 | [Option("decoding-method", Required = false, Default = "greedy_search", |
| 48 | HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] | 44 | HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] |
| 49 | - public string DecodingMethod { get; set; } | 45 | + public string? DecodingMethod { get; set; } |
| 50 | 46 | ||
| 51 | [Option(Required = false, Default = false, HelpText = "True to show model info during loading")] | 47 | [Option(Required = false, Default = false, HelpText = "True to show model info during loading")] |
| 52 | public bool Debug { get; set; } | 48 | public bool Debug { get; set; } |
| @@ -126,7 +122,7 @@ to download pre-trained streaming models. | @@ -126,7 +122,7 @@ to download pre-trained streaming models. | ||
| 126 | 122 | ||
| 127 | private static void Run(Options options) | 123 | private static void Run(Options options) |
| 128 | { | 124 | { |
| 129 | - OnlineRecognizerConfig config = new OnlineRecognizerConfig(); | 125 | + var config = new OnlineRecognizerConfig(); |
| 130 | config.FeatConfig.SampleRate = options.SampleRate; | 126 | config.FeatConfig.SampleRate = options.SampleRate; |
| 131 | 127 | ||
| 132 | // All models from icefall using feature dim 80. | 128 | // All models from icefall using feature dim 80. |
| @@ -153,9 +149,9 @@ to download pre-trained streaming models. | @@ -153,9 +149,9 @@ to download pre-trained streaming models. | ||
| 153 | config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence; | 149 | config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence; |
| 154 | config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; | 150 | config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; |
| 155 | 151 | ||
| 156 | - OnlineRecognizer recognizer = new OnlineRecognizer(config); | 152 | + var recognizer = new OnlineRecognizer(config); |
| 157 | 153 | ||
| 158 | - OnlineStream s = recognizer.CreateStream(); | 154 | + var s = recognizer.CreateStream(); |
| 159 | 155 | ||
| 160 | Console.WriteLine(PortAudio.VersionInfo.versionText); | 156 | Console.WriteLine(PortAudio.VersionInfo.versionText); |
| 161 | PortAudio.Initialize(); | 157 | PortAudio.Initialize(); |
| @@ -176,12 +172,12 @@ to download pre-trained streaming models. | @@ -176,12 +172,12 @@ to download pre-trained streaming models. | ||
| 176 | Environment.Exit(1); | 172 | Environment.Exit(1); |
| 177 | } | 173 | } |
| 178 | 174 | ||
| 179 | - DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex); | 175 | + var info = PortAudio.GetDeviceInfo(deviceIndex); |
| 180 | 176 | ||
| 181 | Console.WriteLine(); | 177 | Console.WriteLine(); |
| 182 | Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); | 178 | Console.WriteLine($"Use default device {deviceIndex} ({info.name})"); |
| 183 | 179 | ||
| 184 | - StreamParameters param = new StreamParameters(); | 180 | + var param = new StreamParameters(); |
| 185 | param.device = deviceIndex; | 181 | param.device = deviceIndex; |
| 186 | param.channelCount = 1; | 182 | param.channelCount = 1; |
| 187 | param.sampleFormat = SampleFormat.Float32; | 183 | param.sampleFormat = SampleFormat.Float32; |
| @@ -189,14 +185,14 @@ to download pre-trained streaming models. | @@ -189,14 +185,14 @@ to download pre-trained streaming models. | ||
| 189 | param.hostApiSpecificStreamInfo = IntPtr.Zero; | 185 | param.hostApiSpecificStreamInfo = IntPtr.Zero; |
| 190 | 186 | ||
| 191 | PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, | 187 | PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output, |
| 192 | - UInt32 frameCount, | 188 | + uint frameCount, |
| 193 | ref StreamCallbackTimeInfo timeInfo, | 189 | ref StreamCallbackTimeInfo timeInfo, |
| 194 | StreamCallbackFlags statusFlags, | 190 | StreamCallbackFlags statusFlags, |
| 195 | IntPtr userData | 191 | IntPtr userData |
| 196 | ) => | 192 | ) => |
| 197 | { | 193 | { |
| 198 | - float[] samples = new float[frameCount]; | ||
| 199 | - Marshal.Copy(input, samples, 0, (Int32)frameCount); | 194 | + var samples = new float[frameCount]; |
| 195 | + Marshal.Copy(input, samples, 0, (int)frameCount); | ||
| 200 | 196 | ||
| 201 | s.AcceptWaveform(options.SampleRate, samples); | 197 | s.AcceptWaveform(options.SampleRate, samples); |
| 202 | 198 | ||
| @@ -215,7 +211,7 @@ to download pre-trained streaming models. | @@ -215,7 +211,7 @@ to download pre-trained streaming models. | ||
| 215 | 211 | ||
| 216 | stream.Start(); | 212 | stream.Start(); |
| 217 | 213 | ||
| 218 | - String lastText = ""; | 214 | + var lastText = string.Empty; |
| 219 | int segmentIndex = 0; | 215 | int segmentIndex = 0; |
| 220 | 216 | ||
| 221 | while (true) | 217 | while (true) |
| @@ -245,9 +241,5 @@ to download pre-trained streaming models. | @@ -245,9 +241,5 @@ to download pre-trained streaming models. | ||
| 245 | 241 | ||
| 246 | Thread.Sleep(200); // ms | 242 | Thread.Sleep(200); // ms |
| 247 | } | 243 | } |
| 248 | - | ||
| 249 | - PortAudio.Terminate(); | ||
| 250 | - | ||
| 251 | - | ||
| 252 | } | 244 | } |
| 253 | } | 245 | } |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>speech_recognition_from_microphone</RootNamespace> | 6 | <RootNamespace>speech_recognition_from_microphone</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -15,12 +15,9 @@ | @@ -15,12 +15,9 @@ | ||
| 15 | // dotnet run | 15 | // dotnet run |
| 16 | 16 | ||
| 17 | using SherpaOnnx; | 17 | using SherpaOnnx; |
| 18 | -using System.Collections.Generic; | ||
| 19 | -using System; | ||
| 20 | 18 | ||
| 21 | class SpokenLanguageIdentificationDemo | 19 | class SpokenLanguageIdentificationDemo |
| 22 | { | 20 | { |
| 23 | - | ||
| 24 | static void Main(string[] args) | 21 | static void Main(string[] args) |
| 25 | { | 22 | { |
| 26 | var config = new SpokenLanguageIdentificationConfig(); | 23 | var config = new SpokenLanguageIdentificationConfig(); |
| @@ -30,7 +27,7 @@ class SpokenLanguageIdentificationDemo | @@ -30,7 +27,7 @@ class SpokenLanguageIdentificationDemo | ||
| 30 | var slid = new SpokenLanguageIdentification(config); | 27 | var slid = new SpokenLanguageIdentification(config); |
| 31 | var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; | 28 | var filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; |
| 32 | 29 | ||
| 33 | - WaveReader waveReader = new WaveReader(filename); | 30 | + var waveReader = new WaveReader(filename); |
| 34 | 31 | ||
| 35 | var s = slid.CreateStream(); | 32 | var s = slid.CreateStream(); |
| 36 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | 33 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>spoken_language_identification</RootNamespace> | 6 | <RootNamespace>spoken_language_identification</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -13,12 +13,9 @@ | @@ -13,12 +13,9 @@ | ||
| 13 | // dotnet run | 13 | // dotnet run |
| 14 | 14 | ||
| 15 | using SherpaOnnx; | 15 | using SherpaOnnx; |
| 16 | -using System.Collections.Generic; | ||
| 17 | -using System; | ||
| 18 | 16 | ||
| 19 | class StreamingHlgDecodingDemo | 17 | class StreamingHlgDecodingDemo |
| 20 | { | 18 | { |
| 21 | - | ||
| 22 | static void Main(string[] args) | 19 | static void Main(string[] args) |
| 23 | { | 20 | { |
| 24 | var config = new OnlineRecognizerConfig(); | 21 | var config = new OnlineRecognizerConfig(); |
| @@ -32,15 +29,15 @@ class StreamingHlgDecodingDemo | @@ -32,15 +29,15 @@ class StreamingHlgDecodingDemo | ||
| 32 | config.ModelConfig.Debug = 0; | 29 | config.ModelConfig.Debug = 0; |
| 33 | config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"; | 30 | config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"; |
| 34 | 31 | ||
| 35 | - OnlineRecognizer recognizer = new OnlineRecognizer(config); | 32 | + var recognizer = new OnlineRecognizer(config); |
| 36 | 33 | ||
| 37 | var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; | 34 | var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; |
| 38 | 35 | ||
| 39 | - WaveReader waveReader = new WaveReader(filename); | ||
| 40 | - OnlineStream s = recognizer.CreateStream(); | 36 | + var waveReader = new WaveReader(filename); |
| 37 | + var s = recognizer.CreateStream(); | ||
| 41 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | 38 | s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); |
| 42 | 39 | ||
| 43 | - float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | 40 | + var tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; |
| 44 | s.AcceptWaveform(waveReader.SampleRate, tailPadding); | 41 | s.AcceptWaveform(waveReader.SampleRate, tailPadding); |
| 45 | s.InputFinished(); | 42 | s.InputFinished(); |
| 46 | 43 | ||
| @@ -49,7 +46,7 @@ class StreamingHlgDecodingDemo | @@ -49,7 +46,7 @@ class StreamingHlgDecodingDemo | ||
| 49 | recognizer.Decode(s); | 46 | recognizer.Decode(s); |
| 50 | } | 47 | } |
| 51 | 48 | ||
| 52 | - OnlineRecognizerResult r = recognizer.GetResult(s); | 49 | + var r = recognizer.GetResult(s); |
| 53 | var text = r.Text; | 50 | var text = r.Text; |
| 54 | var tokens = r.Tokens; | 51 | var tokens = r.Tokens; |
| 55 | Console.WriteLine("--------------------"); | 52 | Console.WriteLine("--------------------"); |
| @@ -57,10 +54,8 @@ class StreamingHlgDecodingDemo | @@ -57,10 +54,8 @@ class StreamingHlgDecodingDemo | ||
| 57 | Console.WriteLine("text: {0}", text); | 54 | Console.WriteLine("text: {0}", text); |
| 58 | Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); | 55 | Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); |
| 59 | Console.Write("timestamps: ["); | 56 | Console.Write("timestamps: ["); |
| 60 | - r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); | 57 | + r.Timestamps.ToList().ForEach(i => Console.Write(string.Format("{0:0.00}", i) + ", ")); |
| 61 | Console.WriteLine("]"); | 58 | Console.WriteLine("]"); |
| 62 | Console.WriteLine("--------------------"); | 59 | Console.WriteLine("--------------------"); |
| 63 | } | 60 | } |
| 64 | } | 61 | } |
| 65 | - | ||
| 66 | - |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>streaming_hlg_decoding</RootNamespace> | 6 | <RootNamespace>streaming_hlg_decoding</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| @@ -3,8 +3,6 @@ | @@ -3,8 +3,6 @@ | ||
| 3 | // This file shows how to use a silero_vad model with a non-streaming Paraformer | 3 | // This file shows how to use a silero_vad model with a non-streaming Paraformer |
| 4 | // for speech recognition. | 4 | // for speech recognition. |
| 5 | using SherpaOnnx; | 5 | using SherpaOnnx; |
| 6 | -using System.Collections.Generic; | ||
| 7 | -using System; | ||
| 8 | 6 | ||
| 9 | class VadNonStreamingAsrParaformer | 7 | class VadNonStreamingAsrParaformer |
| 10 | { | 8 | { |
| @@ -12,45 +10,49 @@ class VadNonStreamingAsrParaformer | @@ -12,45 +10,49 @@ class VadNonStreamingAsrParaformer | ||
| 12 | { | 10 | { |
| 13 | // please download model files from | 11 | // please download model files from |
| 14 | // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | 12 | // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models |
| 15 | - OfflineRecognizerConfig config = new OfflineRecognizerConfig(); | 13 | + var config = new OfflineRecognizerConfig(); |
| 16 | config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"; | 14 | config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"; |
| 17 | config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"; | 15 | config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"; |
| 18 | config.ModelConfig.Debug = 0; | 16 | config.ModelConfig.Debug = 0; |
| 19 | - OfflineRecognizer recognizer = new OfflineRecognizer(config); | 17 | + var recognizer = new OfflineRecognizer(config); |
| 20 | 18 | ||
| 21 | - VadModelConfig vadModelConfig = new VadModelConfig(); | 19 | + var vadModelConfig = new VadModelConfig(); |
| 22 | vadModelConfig.SileroVad.Model = "./silero_vad.onnx"; | 20 | vadModelConfig.SileroVad.Model = "./silero_vad.onnx"; |
| 23 | vadModelConfig.Debug = 0; | 21 | vadModelConfig.Debug = 0; |
| 24 | 22 | ||
| 25 | - VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60); | 23 | + var vad = new VoiceActivityDetector(vadModelConfig, 60); |
| 26 | 24 | ||
| 27 | - string testWaveFilename = "./lei-jun-test.wav"; | ||
| 28 | - WaveReader reader = new WaveReader(testWaveFilename); | 25 | + var testWaveFilename = "./lei-jun-test.wav"; |
| 26 | + var reader = new WaveReader(testWaveFilename); | ||
| 29 | 27 | ||
| 30 | int numSamples = reader.Samples.Length; | 28 | int numSamples = reader.Samples.Length; |
| 31 | int windowSize = vadModelConfig.SileroVad.WindowSize; | 29 | int windowSize = vadModelConfig.SileroVad.WindowSize; |
| 32 | int sampleRate = vadModelConfig.SampleRate; | 30 | int sampleRate = vadModelConfig.SampleRate; |
| 33 | int numIter = numSamples / windowSize; | 31 | int numIter = numSamples / windowSize; |
| 34 | 32 | ||
| 35 | - for (int i = 0; i != numIter; ++i) { | 33 | + for (int i = 0; i != numIter; ++i) |
| 34 | + { | ||
| 36 | int start = i * windowSize; | 35 | int start = i * windowSize; |
| 37 | - float[] samples = new float[windowSize]; | 36 | + var samples = new float[windowSize]; |
| 38 | Array.Copy(reader.Samples, start, samples, 0, windowSize); | 37 | Array.Copy(reader.Samples, start, samples, 0, windowSize); |
| 39 | vad.AcceptWaveform(samples); | 38 | vad.AcceptWaveform(samples); |
| 40 | - if (vad.IsSpeechDetected()) { | ||
| 41 | - while (!vad.IsEmpty()) { | 39 | + if (vad.IsSpeechDetected()) |
| 40 | + { | ||
| 41 | + while (!vad.IsEmpty()) | ||
| 42 | + { | ||
| 42 | SpeechSegment segment = vad.Front(); | 43 | SpeechSegment segment = vad.Front(); |
| 43 | - float startTime = segment.Start / (float)sampleRate; | ||
| 44 | - float duration = segment.Samples.Length / (float)sampleRate; | 44 | + var startTime = segment.Start / (float)sampleRate; |
| 45 | + var duration = segment.Samples.Length / (float)sampleRate; | ||
| 45 | 46 | ||
| 46 | OfflineStream stream = recognizer.CreateStream(); | 47 | OfflineStream stream = recognizer.CreateStream(); |
| 47 | stream.AcceptWaveform(sampleRate, segment.Samples); | 48 | stream.AcceptWaveform(sampleRate, segment.Samples); |
| 48 | recognizer.Decode(stream); | 49 | recognizer.Decode(stream); |
| 49 | - String text = stream.Result.Text; | 50 | + var text = stream.Result.Text; |
| 50 | 51 | ||
| 51 | - if (!String.IsNullOrEmpty(text)) { | ||
| 52 | - Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime), | ||
| 53 | - String.Format("{0:0.00}", startTime+duration), text); | 52 | + if (!string.IsNullOrEmpty(text)) |
| 53 | + { | ||
| 54 | + Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime), | ||
| 55 | + string.Format("{0:0.00}", startTime + duration), text); | ||
| 54 | } | 56 | } |
| 55 | 57 | ||
| 56 | vad.Pop(); | 58 | vad.Pop(); |
| @@ -60,19 +62,21 @@ class VadNonStreamingAsrParaformer | @@ -60,19 +62,21 @@ class VadNonStreamingAsrParaformer | ||
| 60 | 62 | ||
| 61 | vad.Flush(); | 63 | vad.Flush(); |
| 62 | 64 | ||
| 63 | - while (!vad.IsEmpty()) { | ||
| 64 | - SpeechSegment segment = vad.Front(); | 65 | + while (!vad.IsEmpty()) |
| 66 | + { | ||
| 67 | + var segment = vad.Front(); | ||
| 65 | float startTime = segment.Start / (float)sampleRate; | 68 | float startTime = segment.Start / (float)sampleRate; |
| 66 | float duration = segment.Samples.Length / (float)sampleRate; | 69 | float duration = segment.Samples.Length / (float)sampleRate; |
| 67 | 70 | ||
| 68 | - OfflineStream stream = recognizer.CreateStream(); | 71 | + var stream = recognizer.CreateStream(); |
| 69 | stream.AcceptWaveform(sampleRate, segment.Samples); | 72 | stream.AcceptWaveform(sampleRate, segment.Samples); |
| 70 | recognizer.Decode(stream); | 73 | recognizer.Decode(stream); |
| 71 | - String text = stream.Result.Text; | 74 | + var text = stream.Result.Text; |
| 72 | 75 | ||
| 73 | - if (!String.IsNullOrEmpty(text)) { | ||
| 74 | - Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime), | ||
| 75 | - String.Format("{0:0.00}", startTime+duration), text); | 76 | + if (!string.IsNullOrEmpty(text)) |
| 77 | + { | ||
| 78 | + Console.WriteLine("{0}--{1}: {2}", string.Format("{0:0.00}", startTime), | ||
| 79 | + string.Format("{0:0.00}", startTime + duration), text); | ||
| 76 | } | 80 | } |
| 77 | 81 | ||
| 78 | vad.Pop(); | 82 | vad.Pop(); |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | <OutputType>Exe</OutputType> | 4 | <OutputType>Exe</OutputType> |
| 5 | - <TargetFramework>net6.0</TargetFramework> | 5 | + <TargetFramework>net8.0</TargetFramework> |
| 6 | <RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace> | 6 | <RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace> |
| 7 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 8 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
-
请 注册 或 登录 后发表评论