Committed by
GitHub
Add a C# api for offline-recognizer of sherpa-onnx (#129)
正在显示
10 个修改的文件
包含
1883 行增加
和
0 行删除
csharp-api-examples/OfflineDecodeFiles.cs
0 → 100644
| 1 | +// See https://aka.ms/new-console-template for more information | ||
| 2 | +// Copyright (c) 2023 by manyeyes | ||
| 3 | +using SherpaOnnx; | ||
| 4 | +/// Please refer to | ||
| 5 | +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 6 | +/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 7 | +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 8 | +/// from there. | ||
| 9 | + | ||
| 10 | +/// download model eg: | ||
| 11 | +/// (The directory where the application runs) | ||
| 12 | +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory | ||
| 13 | +/// cd /path/to | ||
| 14 | +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01 | ||
| 15 | +/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example | ||
| 16 | +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512 | ||
| 17 | + | ||
| 18 | +/// NuGet for sherpa-onnx | ||
| 19 | +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx | ||
| 20 | +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx | ||
| 21 | + | ||
| 22 | +// transducer Usage: | ||
| 23 | +/* | ||
| 24 | + .\SherpaOnnx.Examples.exe ` | ||
| 25 | + --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` | ||
| 26 | + --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` | ||
| 27 | + --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` | ||
| 28 | + --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` | ||
| 29 | + --num-threads=2 ` | ||
| 30 | + --decoding-method=greedy_search ` | ||
| 31 | + --debug=false ` | ||
| 32 | + ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav | ||
| 33 | + */ | ||
| 34 | + | ||
| 35 | +// paraformer Usage: | ||
| 36 | +/* | ||
| 37 | + .\SherpaOnnx.Examples.exe ` | ||
| 38 | + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` | ||
| 39 | + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` | ||
| 40 | + --num-threads=2 ` | ||
| 41 | + --decoding-method=greedy_search ` | ||
| 42 | + --debug=false ` | ||
| 43 | + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav | ||
| 44 | + */ | ||
| 45 | + | ||
| 46 | +// paraformer Usage: | ||
| 47 | +/* | ||
| 48 | + .\SherpaOnnx.Examples.exe ` | ||
| 49 | + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` | ||
| 50 | + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` | ||
| 51 | + --num-threads=2 ` | ||
| 52 | + --decoding-method=greedy_search ` | ||
| 53 | + --debug=false ` | ||
| 54 | + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav | ||
| 55 | + */ | ||
| 56 | + | ||
| 57 | + | ||
| 58 | +internal class OfflineDecodeFiles | ||
| 59 | +{ | ||
| 60 | + static void Main(string[] args) | ||
| 61 | + { | ||
| 62 | + string usage = @" | ||
| 63 | +----------------------------- | ||
| 64 | +transducer Usage: | ||
| 65 | + --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` | ||
| 66 | + --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` | ||
| 67 | + --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` | ||
| 68 | + --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` | ||
| 69 | + --num-threads=2 ` | ||
| 70 | + --decoding-method=greedy_search ` | ||
| 71 | + --debug=false ` | ||
| 72 | + ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav | ||
| 73 | + | ||
| 74 | +paraformer Usage: | ||
| 75 | + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` | ||
| 76 | + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` | ||
| 77 | + --num-threads=2 ` | ||
| 78 | + --decoding-method=greedy_search ` | ||
| 79 | + --debug=false ` | ||
| 80 | + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav | ||
| 81 | + | ||
| 82 | +nemo Usage: | ||
| 83 | + --tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ` | ||
| 84 | + --nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx ` | ||
| 85 | + --num-threads=2 ` | ||
| 86 | + --decoding-method=greedy_search ` | ||
| 87 | + --debug=false ` | ||
| 88 | + ./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav | ||
| 89 | +----------------------------- | ||
| 90 | +"; | ||
| 91 | + if (args.Length == 0) | ||
| 92 | + { | ||
| 93 | + System.Console.WriteLine("Please enter the correct parameters:"); | ||
| 94 | + System.Console.WriteLine(usage); | ||
| 95 | + System.Text.StringBuilder sb = new System.Text.StringBuilder(); | ||
| 96 | + //args = Console.ReadLine().Split(" "); | ||
| 97 | + while (true) | ||
| 98 | + { | ||
| 99 | + string input = Console.ReadLine(); | ||
| 100 | + sb.AppendLine(input); | ||
| 101 | + if (Console.ReadKey().Key == ConsoleKey.Enter) | ||
| 102 | + break; | ||
| 103 | + } | ||
| 104 | + args = sb.ToString().Split("\r\n"); | ||
| 105 | + } | ||
| 106 | + Console.WriteLine("Started!\n"); | ||
| 107 | + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; | ||
| 108 | + List<string> wavFiles = new List<string>(); | ||
| 109 | + Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles); | ||
| 110 | + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; | ||
| 111 | + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; | ||
| 112 | + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; | ||
| 113 | + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; | ||
| 114 | + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; | ||
| 115 | + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; | ||
| 116 | + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; | ||
| 117 | + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; | ||
| 118 | + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; | ||
| 119 | + | ||
| 120 | + OfflineTransducer offlineTransducer = new OfflineTransducer(); | ||
| 121 | + offlineTransducer.EncoderFilename = encoder; | ||
| 122 | + offlineTransducer.DecoderFilename = decoder; | ||
| 123 | + offlineTransducer.JoinerFilename = joiner; | ||
| 124 | + | ||
| 125 | + OfflineParaformer offlineParaformer = new OfflineParaformer(); | ||
| 126 | + offlineParaformer.Model = paraformer; | ||
| 127 | + | ||
| 128 | + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); | ||
| 129 | + offlineNemoEncDecCtc.Model = nemo_ctc; | ||
| 130 | + | ||
| 131 | + int numThreads = 0; | ||
| 132 | + int.TryParse(num_threads, out numThreads); | ||
| 133 | + bool isDebug = false; | ||
| 134 | + bool.TryParse(debug, out isDebug); | ||
| 135 | + | ||
| 136 | + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; | ||
| 137 | + | ||
| 138 | + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) | ||
| 139 | + && string.IsNullOrEmpty(paraformer) | ||
| 140 | + && string.IsNullOrEmpty(nemo_ctc)) | ||
| 141 | + { | ||
| 142 | + Console.WriteLine("Please specify at least one model"); | ||
| 143 | + Console.WriteLine(usage); | ||
| 144 | + } | ||
| 145 | + // batch decode | ||
| 146 | + TimeSpan total_duration = TimeSpan.Zero; | ||
| 147 | + TimeSpan start_time = TimeSpan.Zero; | ||
| 148 | + TimeSpan end_time = TimeSpan.Zero; | ||
| 149 | + List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>(); | ||
| 150 | + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) | ||
| 151 | + { | ||
| 152 | + OfflineRecognizer<OfflineTransducer> offlineRecognizer = new OfflineRecognizer<OfflineTransducer>( | ||
| 153 | + offlineTransducer, | ||
| 154 | + tokens, | ||
| 155 | + num_threads: numThreads, | ||
| 156 | + debug: isDebug, | ||
| 157 | + decoding_method: decodingMethod); | ||
| 158 | + List<float[]> samplesList = new List<float[]>(); | ||
| 159 | + foreach (string wavFile in wavFiles) | ||
| 160 | + { | ||
| 161 | + TimeSpan duration = TimeSpan.Zero; | ||
| 162 | + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 163 | + samplesList.Add(samples); | ||
| 164 | + total_duration += duration; | ||
| 165 | + } | ||
| 166 | + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); | ||
| 167 | + start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 168 | + offlineRecognizer.DecodeMultipleOfflineStreams(streams); | ||
| 169 | + results = offlineRecognizer.GetResults(streams); | ||
| 170 | + end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 171 | + } | ||
| 172 | + else if (!string.IsNullOrEmpty(paraformer)) | ||
| 173 | + { | ||
| 174 | + OfflineRecognizer<OfflineParaformer> offlineRecognizer = new OfflineRecognizer<OfflineParaformer>( | ||
| 175 | + offlineParaformer, | ||
| 176 | + tokens, | ||
| 177 | + num_threads: numThreads, | ||
| 178 | + debug: isDebug, | ||
| 179 | + decoding_method: decodingMethod); | ||
| 180 | + List<float[]> samplesList = new List<float[]>(); | ||
| 181 | + foreach (string wavFile in wavFiles) | ||
| 182 | + { | ||
| 183 | + TimeSpan duration = TimeSpan.Zero; | ||
| 184 | + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 185 | + samplesList.Add(samples); | ||
| 186 | + total_duration += duration; | ||
| 187 | + } | ||
| 188 | + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); | ||
| 189 | + start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 190 | + offlineRecognizer.DecodeMultipleOfflineStreams(streams); | ||
| 191 | + results = offlineRecognizer.GetResults(streams); | ||
| 192 | + end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 193 | + } | ||
| 194 | + else if (!string.IsNullOrEmpty(nemo_ctc)) | ||
| 195 | + { | ||
| 196 | + OfflineRecognizer<OfflineNemoEncDecCtc> offlineRecognizer = new OfflineRecognizer<OfflineNemoEncDecCtc>( | ||
| 197 | + offlineNemoEncDecCtc, | ||
| 198 | + tokens, | ||
| 199 | + num_threads: numThreads, | ||
| 200 | + debug: isDebug, | ||
| 201 | + decoding_method: decodingMethod); | ||
| 202 | + List<float[]> samplesList = new List<float[]>(); | ||
| 203 | + foreach (string wavFile in wavFiles) | ||
| 204 | + { | ||
| 205 | + TimeSpan duration = TimeSpan.Zero; | ||
| 206 | + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 207 | + samplesList.Add(samples); | ||
| 208 | + total_duration += duration; | ||
| 209 | + } | ||
| 210 | + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); | ||
| 211 | + start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 212 | + offlineRecognizer.DecodeMultipleOfflineStreams(streams); | ||
| 213 | + results = offlineRecognizer.GetResults(streams); | ||
| 214 | + end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 215 | + } | ||
| 216 | + | ||
| 217 | + foreach (var item in results.Zip<OfflineRecognizerResultEntity, string>(wavFiles)) | ||
| 218 | + { | ||
| 219 | + Console.WriteLine("wavFile:{0}", item.Second); | ||
| 220 | + Console.WriteLine("text:{0}", item.First.text.ToLower()); | ||
| 221 | + Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); | ||
| 222 | + } | ||
| 223 | + | ||
| 224 | + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; | ||
| 225 | + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; | ||
| 226 | + Console.WriteLine("num_threads:{0}", num_threads); | ||
| 227 | + Console.WriteLine("decoding_method:{0}", decodingMethod); | ||
| 228 | + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); | ||
| 229 | + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); | ||
| 230 | + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); | ||
| 231 | + | ||
| 232 | + Console.WriteLine("End!"); | ||
| 233 | + } | ||
| 234 | + | ||
| 235 | + static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles) | ||
| 236 | + { | ||
| 237 | + Dictionary<string, string> argsDict = new Dictionary<string, string>(); | ||
| 238 | + foreach (string input in args) | ||
| 239 | + { | ||
| 240 | + string[] ss = input.Split("="); | ||
| 241 | + if (ss.Length == 1) | ||
| 242 | + { | ||
| 243 | + if (!string.IsNullOrEmpty(ss[0])) | ||
| 244 | + { | ||
| 245 | + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); | ||
| 246 | + } | ||
| 247 | + } | ||
| 248 | + else | ||
| 249 | + { | ||
| 250 | + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); | ||
| 251 | + } | ||
| 252 | + } | ||
| 253 | + return argsDict; | ||
| 254 | + } | ||
| 255 | +} |
csharp-api-examples/OnlineDecodeFile.cs
0 → 100644
| 1 | +// See https://aka.ms/new-console-template for more information | ||
| 2 | +// Copyright (c) 2023 by manyeyes | ||
| 3 | +using SherpaOnnx; | ||
| 4 | +/// Please refer to | ||
| 5 | +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 6 | +/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 7 | +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 8 | +/// from there. | ||
| 9 | + | ||
| 10 | +/// download model eg: | ||
| 11 | +/// (The directory where the application runs) | ||
| 12 | +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory | ||
| 13 | +/// cd /path/to | ||
| 14 | +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 15 | + | ||
| 16 | +/// NuGet for sherpa-onnx | ||
| 17 | +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx | ||
| 18 | +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx | ||
| 19 | + | ||
| 20 | +// transducer Usage: | ||
| 21 | +/* | ||
| 22 | + .\SherpaOnnx.Examples.exe ` | ||
| 23 | + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 24 | + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 25 | + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 26 | + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 27 | + --num-threads=2 ` | ||
| 28 | + --decoding-method=modified_beam_search ` | ||
| 29 | + --debug=false ` | ||
| 30 | + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav | ||
| 31 | + */ | ||
| 32 | + | ||
| 33 | +internal class OnlineDecodeFile | ||
| 34 | +{ | ||
| 35 | + static void Main(string[] args) | ||
| 36 | + { | ||
| 37 | + string usage = @" | ||
| 38 | +----------------------------- | ||
| 39 | +transducer Usage: | ||
| 40 | + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 41 | + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 42 | + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 43 | + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 44 | + --num-threads=2 ` | ||
| 45 | + --decoding-method=modified_beam_search ` | ||
| 46 | + --debug=false ` | ||
| 47 | + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav | ||
| 48 | +----------------------------- | ||
| 49 | +"; | ||
| 50 | + if (args.Length == 0) | ||
| 51 | + { | ||
| 52 | + System.Console.WriteLine("Please enter the correct parameters:"); | ||
| 53 | + System.Console.WriteLine(usage); | ||
| 54 | + System.Text.StringBuilder sb = new System.Text.StringBuilder(); | ||
| 55 | + //args = Console.ReadLine().Split(" "); | ||
| 56 | + while (true) | ||
| 57 | + { | ||
| 58 | + string input = Console.ReadLine(); | ||
| 59 | + sb.AppendLine(input); | ||
| 60 | + if (Console.ReadKey().Key == ConsoleKey.Enter) | ||
| 61 | + break; | ||
| 62 | + } | ||
| 63 | + args = sb.ToString().Split("\r\n"); | ||
| 64 | + } | ||
| 65 | + Console.WriteLine("Started!\n"); | ||
| 66 | + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; | ||
| 67 | + List<string> wavFiles = new List<string>(); | ||
| 68 | + Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles); | ||
| 69 | + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; | ||
| 70 | + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; | ||
| 71 | + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; | ||
| 72 | + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; | ||
| 73 | + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; | ||
| 74 | + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; | ||
| 75 | + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; | ||
| 76 | + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; | ||
| 77 | + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; | ||
| 78 | + | ||
| 79 | + OfflineTransducer offlineTransducer = new OfflineTransducer(); | ||
| 80 | + offlineTransducer.EncoderFilename = encoder; | ||
| 81 | + offlineTransducer.DecoderFilename = decoder; | ||
| 82 | + offlineTransducer.JoinerFilename = joiner; | ||
| 83 | + | ||
| 84 | + OfflineParaformer offlineParaformer = new OfflineParaformer(); | ||
| 85 | + offlineParaformer.Model = paraformer; | ||
| 86 | + | ||
| 87 | + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); | ||
| 88 | + offlineNemoEncDecCtc.Model = nemo_ctc; | ||
| 89 | + | ||
| 90 | + int numThreads = 0; | ||
| 91 | + int.TryParse(num_threads, out numThreads); | ||
| 92 | + bool isDebug = false; | ||
| 93 | + bool.TryParse(debug, out isDebug); | ||
| 94 | + | ||
| 95 | + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; | ||
| 96 | + | ||
| 97 | + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) | ||
| 98 | + && string.IsNullOrEmpty(paraformer) | ||
| 99 | + && string.IsNullOrEmpty(nemo_ctc)) | ||
| 100 | + { | ||
| 101 | + Console.WriteLine("Please specify at least one model"); | ||
| 102 | + Console.WriteLine(usage); | ||
| 103 | + } | ||
| 104 | + // batch decode | ||
| 105 | + TimeSpan total_duration = TimeSpan.Zero; | ||
| 106 | + TimeSpan start_time = TimeSpan.Zero; | ||
| 107 | + TimeSpan end_time = TimeSpan.Zero; | ||
| 108 | + List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>(); | ||
| 109 | + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) | ||
| 110 | + { | ||
| 111 | + OnlineTransducer onlineTransducer = new OnlineTransducer(); | ||
| 112 | + onlineTransducer.EncoderFilename = encoder; | ||
| 113 | + onlineTransducer.DecoderFilename = decoder; | ||
| 114 | + onlineTransducer.JoinerFilename = joiner; | ||
| 115 | + //test online | ||
| 116 | + OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>( | ||
| 117 | + onlineTransducer, | ||
| 118 | + tokens, | ||
| 119 | + num_threads: numThreads, | ||
| 120 | + debug: isDebug, | ||
| 121 | + decoding_method: decodingMethod); | ||
| 122 | + foreach (string wavFile in wavFiles) | ||
| 123 | + { | ||
| 124 | + TimeSpan duration = TimeSpan.Zero; | ||
| 125 | + List<float[]> samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration); | ||
| 126 | + OnlineStream stream = onlineRecognizer.CreateStream(); | ||
| 127 | + start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 128 | + for (int i = 0; i < samplesList.Count; i++) | ||
| 129 | + { | ||
| 130 | + onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]); | ||
| 131 | + onlineRecognizer.DecodeStream(stream); | ||
| 132 | + OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream); | ||
| 133 | + Console.WriteLine(result_on.text); | ||
| 134 | + } | ||
| 135 | + total_duration += duration; | ||
| 136 | + } | ||
| 137 | + end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 138 | + } | ||
| 139 | + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; | ||
| 140 | + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; | ||
| 141 | + Console.WriteLine("num_threads:{0}", num_threads); | ||
| 142 | + Console.WriteLine("decoding_method:{0}", decodingMethod); | ||
| 143 | + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); | ||
| 144 | + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); | ||
| 145 | + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); | ||
| 146 | + | ||
| 147 | + Console.WriteLine("End!"); | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles) | ||
| 151 | + { | ||
| 152 | + Dictionary<string, string> argsDict = new Dictionary<string, string>(); | ||
| 153 | + foreach (string input in args) | ||
| 154 | + { | ||
| 155 | + string[] ss = input.Split("="); | ||
| 156 | + if (ss.Length == 1) | ||
| 157 | + { | ||
| 158 | + if (!string.IsNullOrEmpty(ss[0])) | ||
| 159 | + { | ||
| 160 | + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); | ||
| 161 | + } | ||
| 162 | + } | ||
| 163 | + else | ||
| 164 | + { | ||
| 165 | + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); | ||
| 166 | + } | ||
| 167 | + } | ||
| 168 | + return argsDict; | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | +} |
csharp-api-examples/OnlineDecodeFiles.cs
0 → 100644
| 1 | +// See https://aka.ms/new-console-template for more information | ||
| 2 | +// Copyright (c) 2023 by manyeyes | ||
| 3 | +using SherpaOnnx; | ||
| 4 | +/// Please refer to | ||
| 5 | +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 6 | +/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 7 | +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 8 | +/// from there. | ||
| 9 | + | ||
| 10 | +/// download model eg: | ||
| 11 | +/// (The directory where the application runs) | ||
| 12 | +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory | ||
| 13 | +/// cd /path/to | ||
| 14 | +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 15 | + | ||
| 16 | +/// NuGet for sherpa-onnx | ||
| 17 | +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx | ||
| 18 | +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx | ||
| 19 | + | ||
| 20 | +// transducer Usage: | ||
| 21 | +/* | ||
| 22 | + .\SherpaOnnx.Examples.exe ` | ||
| 23 | + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 24 | + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 25 | + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 26 | + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 27 | + --num-threads=2 ` | ||
| 28 | + --decoding-method=modified_beam_search ` | ||
| 29 | + --debug=false ` | ||
| 30 | + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` | ||
| 31 | + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav | ||
| 32 | + */ | ||
| 33 | + | ||
| 34 | +internal class OnlineDecodeFiles | ||
| 35 | +{ | ||
| 36 | + static void Main(string[] args) | ||
| 37 | + { | ||
| 38 | + string usage = @" | ||
| 39 | +----------------------------- | ||
| 40 | +transducer Usage: | ||
| 41 | + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 42 | + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 43 | + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 44 | + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 45 | + --num-threads=2 ` | ||
| 46 | + --decoding-method=modified_beam_search ` | ||
| 47 | + --debug=false ` | ||
| 48 | + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` | ||
| 49 | + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav | ||
| 50 | +----------------------------- | ||
| 51 | +"; | ||
| 52 | + if (args.Length == 0) | ||
| 53 | + { | ||
| 54 | + System.Console.WriteLine("Please enter the correct parameters:"); | ||
| 55 | + System.Console.WriteLine(usage); | ||
| 56 | + System.Text.StringBuilder sb = new System.Text.StringBuilder(); | ||
| 57 | + //args = Console.ReadLine().Split(" "); | ||
| 58 | + while (true) | ||
| 59 | + { | ||
| 60 | + string input = Console.ReadLine(); | ||
| 61 | + sb.AppendLine(input); | ||
| 62 | + if (Console.ReadKey().Key == ConsoleKey.Enter) | ||
| 63 | + break; | ||
| 64 | + } | ||
| 65 | + args = sb.ToString().Split("\r\n"); | ||
| 66 | + } | ||
| 67 | + Console.WriteLine("Started!\n"); | ||
| 68 | + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; | ||
| 69 | + List<string> wavFiles = new List<string>(); | ||
| 70 | + Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles); | ||
| 71 | + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; | ||
| 72 | + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; | ||
| 73 | + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; | ||
| 74 | + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; | ||
| 75 | + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; | ||
| 76 | + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; | ||
| 77 | + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; | ||
| 78 | + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; | ||
| 79 | + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; | ||
| 80 | + | ||
| 81 | + OfflineTransducer offlineTransducer = new OfflineTransducer(); | ||
| 82 | + offlineTransducer.EncoderFilename = encoder; | ||
| 83 | + offlineTransducer.DecoderFilename = decoder; | ||
| 84 | + offlineTransducer.JoinerFilename = joiner; | ||
| 85 | + | ||
| 86 | + OfflineParaformer offlineParaformer = new OfflineParaformer(); | ||
| 87 | + offlineParaformer.Model = paraformer; | ||
| 88 | + | ||
| 89 | + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); | ||
| 90 | + offlineNemoEncDecCtc.Model = nemo_ctc; | ||
| 91 | + | ||
| 92 | + int numThreads = 0; | ||
| 93 | + int.TryParse(num_threads, out numThreads); | ||
| 94 | + bool isDebug = false; | ||
| 95 | + bool.TryParse(debug, out isDebug); | ||
| 96 | + | ||
| 97 | + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; | ||
| 98 | + | ||
| 99 | + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) | ||
| 100 | + && string.IsNullOrEmpty(paraformer) | ||
| 101 | + && string.IsNullOrEmpty(nemo_ctc)) | ||
| 102 | + { | ||
| 103 | + Console.WriteLine("Please specify at least one model"); | ||
| 104 | + Console.WriteLine(usage); | ||
| 105 | + } | ||
| 106 | + // batch decode | ||
| 107 | + TimeSpan total_duration = TimeSpan.Zero; | ||
| 108 | + TimeSpan start_time = TimeSpan.Zero; | ||
| 109 | + TimeSpan end_time = TimeSpan.Zero; | ||
| 110 | + List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>(); | ||
| 111 | + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) | ||
| 112 | + { | ||
| 113 | + OnlineTransducer onlineTransducer = new OnlineTransducer(); | ||
| 114 | + onlineTransducer.EncoderFilename = encoder; | ||
| 115 | + onlineTransducer.DecoderFilename = decoder; | ||
| 116 | + onlineTransducer.JoinerFilename = joiner; | ||
| 117 | + //test online | ||
| 118 | + OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>( | ||
| 119 | + onlineTransducer, | ||
| 120 | + tokens, | ||
| 121 | + num_threads: numThreads, | ||
| 122 | + debug: isDebug, | ||
| 123 | + decoding_method: decodingMethod); | ||
| 124 | + List<float[]> samplesList = new List<float[]>(); | ||
| 125 | + foreach (string wavFile in wavFiles) | ||
| 126 | + { | ||
| 127 | + TimeSpan duration = TimeSpan.Zero; | ||
| 128 | + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 129 | + samplesList.Add(samples); | ||
| 130 | + total_duration += duration; | ||
| 131 | + } | ||
| 132 | + start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 133 | + List<OnlineStream> streams = new List<OnlineStream>(); | ||
| 134 | + foreach (float[] samples in samplesList) | ||
| 135 | + { | ||
| 136 | + OnlineStream stream = onlineRecognizer.CreateStream(); | ||
| 137 | + onlineRecognizer.AcceptWaveForm(stream, 16000, samples); | ||
| 138 | + streams.Add(stream); | ||
| 139 | + onlineRecognizer.InputFinished(stream); | ||
| 140 | + } | ||
| 141 | + onlineRecognizer.DecodeMultipleStreams(streams); | ||
| 142 | + results = onlineRecognizer.GetResults(streams); | ||
| 143 | + foreach (OnlineRecognizerResultEntity result in results) | ||
| 144 | + { | ||
| 145 | + Console.WriteLine(result.text); | ||
| 146 | + } | ||
| 147 | + end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + | ||
| 151 | + foreach (var item in results.Zip<OnlineRecognizerResultEntity, string>(wavFiles)) | ||
| 152 | + { | ||
| 153 | + Console.WriteLine("wavFile:{0}", item.Second); | ||
| 154 | + Console.WriteLine("text:{0}", item.First.text.ToLower()); | ||
| 155 | + Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); | ||
| 156 | + } | ||
| 157 | + | ||
| 158 | + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; | ||
| 159 | + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; | ||
| 160 | + Console.WriteLine("num_threads:{0}", num_threads); | ||
| 161 | + Console.WriteLine("decoding_method:{0}", decodingMethod); | ||
| 162 | + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); | ||
| 163 | + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); | ||
| 164 | + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); | ||
| 165 | + | ||
| 166 | + Console.WriteLine("End!"); | ||
| 167 | + } | ||
| 168 | + | ||
| 169 | + public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List<string> wavFiles, ref TimeSpan total_duration) | ||
| 170 | + { | ||
| 171 | + OnlineTransducer onlineTransducer = new OnlineTransducer(); | ||
| 172 | + onlineTransducer.EncoderFilename = encoder; | ||
| 173 | + onlineTransducer.DecoderFilename = decoder; | ||
| 174 | + onlineTransducer.JoinerFilename = joiner; | ||
| 175 | + //test online | ||
| 176 | + OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>( | ||
| 177 | + onlineTransducer, | ||
| 178 | + tokens, | ||
| 179 | + num_threads: numThreads, | ||
| 180 | + debug: isDebug, | ||
| 181 | + decoding_method: decodingMethod); | ||
| 182 | + List<float[]> samplesList = new List<float[]>(); | ||
| 183 | + foreach (string wavFile in wavFiles) | ||
| 184 | + { | ||
| 185 | + TimeSpan duration = TimeSpan.Zero; | ||
| 186 | + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 187 | + samplesList.Add(samples); | ||
| 188 | + total_duration += duration; | ||
| 189 | + } | ||
| 190 | + TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 191 | + List<OnlineStream> streams = onlineRecognizer.CreateStreams(samplesList); | ||
| 192 | + onlineRecognizer.DecodeMultipleStreams(streams); | ||
| 193 | + List<OnlineRecognizerResultEntity> results = onlineRecognizer.GetResults(streams); | ||
| 194 | + foreach (OnlineRecognizerResultEntity result in results) | ||
| 195 | + { | ||
| 196 | + Console.WriteLine(result.text); | ||
| 197 | + } | ||
| 198 | + TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 199 | + } | ||
| 200 | + | ||
| 201 | + static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles) | ||
| 202 | + { | ||
| 203 | + Dictionary<string, string> argsDict = new Dictionary<string, string>(); | ||
| 204 | + foreach (string input in args) | ||
| 205 | + { | ||
| 206 | + string[] ss = input.Split("="); | ||
| 207 | + if (ss.Length == 1) | ||
| 208 | + { | ||
| 209 | + if (!string.IsNullOrEmpty(ss[0])) | ||
| 210 | + { | ||
| 211 | + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); | ||
| 212 | + } | ||
| 213 | + } | ||
| 214 | + else | ||
| 215 | + { | ||
| 216 | + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); | ||
| 217 | + } | ||
| 218 | + } | ||
| 219 | + return argsDict; | ||
| 220 | + } | ||
| 221 | +} |
csharp-api-examples/README.md
0 → 100644
| 1 | +#ProjectReference csharp-api | ||
| 2 | +`<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />` | ||
| 3 | +The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api. | ||
| 4 | +This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments. | ||
| 5 | + | ||
| 6 | +------------ | ||
| 7 | +Alternatively, install sherpaonnx through nuget. | ||
| 8 | +#NuGet for sherpa-onnx | ||
| 9 | +PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx |
csharp-api-examples/Utils/AudioHelper.cs
0 → 100644
| 1 | +using NAudio.Wave; | ||
| 2 | +using System; | ||
| 3 | +using System.Collections.Generic; | ||
| 4 | +using System.Diagnostics; | ||
| 5 | +using System.Linq; | ||
| 6 | +using System.Text; | ||
| 7 | +using System.Threading.Tasks; | ||
| 8 | + | ||
| 9 | +/// <summary> | ||
| 10 | +/// audio processing | ||
| 11 | +/// Copyright (c) 2023 by manyeyes | ||
| 12 | +/// </summary> | ||
| 13 | +public class AudioHelper | ||
| 14 | +{ | ||
| 15 | + public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration) | ||
| 16 | + { | ||
| 17 | + if (!File.Exists(wavFilePath)) | ||
| 18 | + { | ||
| 19 | + Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); | ||
| 20 | + return new float[1]; | ||
| 21 | + } | ||
| 22 | + AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); | ||
| 23 | + byte[] datas = new byte[_audioFileReader.Length]; | ||
| 24 | + _audioFileReader.Read(datas, 0, datas.Length); | ||
| 25 | + duration = _audioFileReader.TotalTime; | ||
| 26 | + float[] wavdata = new float[datas.Length / sizeof(float)]; | ||
| 27 | + Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length); | ||
| 28 | + return wavdata; | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + public static List<float[]> GetChunkSamplesList(string wavFilePath, ref TimeSpan duration) | ||
| 32 | + { | ||
| 33 | + List<float[]> wavdatas = new List<float[]>(); | ||
| 34 | + if (!File.Exists(wavFilePath)) | ||
| 35 | + { | ||
| 36 | + Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); | ||
| 37 | + wavdatas.Add(new float[1]); | ||
| 38 | + return wavdatas; | ||
| 39 | + } | ||
| 40 | + AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); | ||
| 41 | + byte[] datas = new byte[_audioFileReader.Length]; | ||
| 42 | + int chunkSize = 16000;// datas.Length / sizeof(float); | ||
| 43 | + int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize); | ||
| 44 | + for (int i = 0; i < chunkNum; i++) | ||
| 45 | + { | ||
| 46 | + int offset = 0; | ||
| 47 | + int dataCount = 0; | ||
| 48 | + if (Math.Abs(datas.Length - i * chunkSize) > chunkSize) | ||
| 49 | + { | ||
| 50 | + offset = i * chunkSize; | ||
| 51 | + dataCount = chunkSize; | ||
| 52 | + } | ||
| 53 | + else | ||
| 54 | + { | ||
| 55 | + offset = i * chunkSize; | ||
| 56 | + dataCount = datas.Length - i * chunkSize; | ||
| 57 | + } | ||
| 58 | + _audioFileReader.Read(datas, offset, dataCount); | ||
| 59 | + duration += _audioFileReader.TotalTime; | ||
| 60 | + float[] wavdata = new float[chunkSize / sizeof(float)]; | ||
| 61 | + Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount); | ||
| 62 | + wavdatas.Add(wavdata); | ||
| 63 | + | ||
| 64 | + } | ||
| 65 | + return wavdatas; | ||
| 66 | + } | ||
| 67 | +} |
csharp-api-examples/sherpa-onnx.csproj
0 → 100644
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>sherpa_onnx</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + <StartupObject>OnlineDecodeFiles</StartupObject> | ||
| 10 | + </PropertyGroup> | ||
| 11 | + | ||
| 12 | + <ItemGroup> | ||
| 13 | + <PackageReference Include="NAudio" Version="2.1.0" /> | ||
| 14 | + </ItemGroup> | ||
| 15 | + | ||
| 16 | + <ItemGroup> | ||
| 17 | + <ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" /> | ||
| 18 | + </ItemGroup> | ||
| 19 | + | ||
| 20 | +</Project> |
sherpa-onnx/csharp-api/SherpaOnnx.cs
0 → 100644
| 1 | +using System.Runtime.InteropServices; | ||
| 2 | +using System.Diagnostics; | ||
| 3 | + | ||
| 4 | +namespace SherpaOnnx | ||
| 5 | +{ | ||
| 6 | + /// <summary> | ||
| 7 | + /// online recognizer package | ||
| 8 | + /// Copyright (c) 2023 by manyeyes | ||
| 9 | + /// </summary> | ||
| 10 | + public class OnlineBase : IDisposable | ||
| 11 | + { | ||
| 12 | + public void Dispose() | ||
| 13 | + { | ||
| 14 | + Dispose(disposing: true); | ||
| 15 | + GC.SuppressFinalize(this); | ||
| 16 | + } | ||
| 17 | + protected virtual void Dispose(bool disposing) | ||
| 18 | + { | ||
| 19 | + if (!disposing) | ||
| 20 | + { | ||
| 21 | + if (_onlineRecognizerResult != IntPtr.Zero) | ||
| 22 | + { | ||
| 23 | + SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); | ||
| 24 | + _onlineRecognizerResult = IntPtr.Zero; | ||
| 25 | + } | ||
| 26 | + if (_onlineStream.impl != IntPtr.Zero) | ||
| 27 | + { | ||
| 28 | + SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); | ||
| 29 | + _onlineStream.impl = IntPtr.Zero; | ||
| 30 | + } | ||
| 31 | + if (_onlineRecognizer.impl != IntPtr.Zero) | ||
| 32 | + { | ||
| 33 | + SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); | ||
| 34 | + _onlineRecognizer.impl = IntPtr.Zero; | ||
| 35 | + } | ||
| 36 | + this._disposed = true; | ||
| 37 | + } | ||
| 38 | + } | ||
| 39 | + ~OnlineBase() | ||
| 40 | + { | ||
| 41 | + Dispose(this._disposed); | ||
| 42 | + } | ||
| 43 | + internal SherpaOnnxOnlineStream _onlineStream; | ||
| 44 | + internal IntPtr _onlineRecognizerResult; | ||
| 45 | + internal SherpaOnnxOnlineRecognizer _onlineRecognizer; | ||
| 46 | + internal bool _disposed = false; | ||
| 47 | + } | ||
| 48 | + public class OnlineStream : OnlineBase | ||
| 49 | + { | ||
| 50 | + internal OnlineStream(SherpaOnnxOnlineStream onlineStream) | ||
| 51 | + { | ||
| 52 | + this._onlineStream = onlineStream; | ||
| 53 | + } | ||
| 54 | + protected override void Dispose(bool disposing) | ||
| 55 | + { | ||
| 56 | + if (!disposing) | ||
| 57 | + { | ||
| 58 | + SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); | ||
| 59 | + _onlineStream.impl = IntPtr.Zero; | ||
| 60 | + this._disposed = true; | ||
| 61 | + base.Dispose(); | ||
| 62 | + } | ||
| 63 | + } | ||
| 64 | + } | ||
| 65 | + public class OnlineRecognizerResult : OnlineBase | ||
| 66 | + { | ||
| 67 | + internal OnlineRecognizerResult(IntPtr onlineRecognizerResult) | ||
| 68 | + { | ||
| 69 | + this._onlineRecognizerResult = onlineRecognizerResult; | ||
| 70 | + } | ||
| 71 | + protected override void Dispose(bool disposing) | ||
| 72 | + { | ||
| 73 | + if (!disposing) | ||
| 74 | + { | ||
| 75 | + SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); | ||
| 76 | + _onlineRecognizerResult = IntPtr.Zero; | ||
| 77 | + this._disposed = true; | ||
| 78 | + base.Dispose(disposing); | ||
| 79 | + } | ||
| 80 | + } | ||
| 81 | + } | ||
| 82 | + public class OnlineRecognizer<T> : OnlineBase | ||
| 83 | + where T : class, new() | ||
| 84 | + { | ||
| 85 | + | ||
| 86 | + public OnlineRecognizer(T t, | ||
| 87 | + string tokensFilePath, string decoding_method = "greedy_search", | ||
| 88 | + int sample_rate = 16000, int feature_dim = 80, | ||
| 89 | + int num_threads = 2, bool debug = false, int max_active_paths = 4, | ||
| 90 | + int enable_endpoint=0,int rule1_min_trailing_silence=0, | ||
| 91 | + int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0) | ||
| 92 | + { | ||
| 93 | + SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer(); | ||
| 94 | + SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig(); | ||
| 95 | + if (t is not null && t.GetType() == typeof(OnlineTransducer)) | ||
| 96 | + { | ||
| 97 | + OnlineTransducer? onlineTransducer = t as OnlineTransducer; | ||
| 98 | +#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 99 | + Trace.Assert(File.Exists(onlineTransducer.DecoderFilename) | ||
| 100 | + && File.Exists(onlineTransducer.EncoderFilename) | ||
| 101 | + && File.Exists(onlineTransducer.JoinerFilename), "Please provide a model"); | ||
| 102 | +#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 103 | + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 104 | + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 105 | + transducer.encoder_filename = onlineTransducer.EncoderFilename; | ||
| 106 | + transducer.decoder_filename = onlineTransducer.DecoderFilename; | ||
| 107 | + transducer.joiner_filename = onlineTransducer.JoinerFilename; | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + model_config.transducer = transducer; | ||
| 111 | + model_config.num_threads = num_threads; | ||
| 112 | + model_config.debug = debug; | ||
| 113 | + model_config.tokens = tokensFilePath; | ||
| 114 | + | ||
| 115 | + SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); | ||
| 116 | + feat_config.sample_rate = sample_rate; | ||
| 117 | + feat_config.feature_dim = feature_dim; | ||
| 118 | + | ||
| 119 | + SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig; | ||
| 120 | + sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method; | ||
| 121 | + sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config; | ||
| 122 | + sherpaOnnxOnlineRecognizerConfig.model_config = model_config; | ||
| 123 | + sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths; | ||
| 124 | + //endpoint | ||
| 125 | + sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint; | ||
| 126 | + sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence; | ||
| 127 | + sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence; | ||
| 128 | + sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length; | ||
| 129 | + | ||
| 130 | + _onlineRecognizer = | ||
| 131 | + SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig); | ||
| 132 | + } | ||
| 133 | + internal OnlineStream CreateOnlineStream() | ||
| 134 | + { | ||
| 135 | + SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer); | ||
| 136 | + return new OnlineStream(stream); | ||
| 137 | + } | ||
| 138 | + public void InputFinished(OnlineStream stream) | ||
| 139 | + { | ||
| 140 | + SherpaOnnxSharp.InputFinished(stream._onlineStream); | ||
| 141 | + } | ||
| 142 | + public List<OnlineStream> CreateStreams(List<float[]> samplesList) | ||
| 143 | + { | ||
| 144 | + int batch_size = samplesList.Count; | ||
| 145 | + List<OnlineStream> streams = new List<OnlineStream>(); | ||
| 146 | + for (int i = 0; i < batch_size; i++) | ||
| 147 | + { | ||
| 148 | + OnlineStream stream = CreateOnlineStream(); | ||
| 149 | + AcceptWaveform(stream._onlineStream, 16000, samplesList[i]); | ||
| 150 | + InputFinished(stream); | ||
| 151 | + streams.Add(stream); | ||
| 152 | + } | ||
| 153 | + return streams; | ||
| 154 | + } | ||
| 155 | + public OnlineStream CreateStream() | ||
| 156 | + { | ||
| 157 | + OnlineStream stream = CreateOnlineStream(); | ||
| 158 | + return stream; | ||
| 159 | + } | ||
| 160 | + internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples) | ||
| 161 | + { | ||
| 162 | + SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length); | ||
| 163 | + } | ||
| 164 | + public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples) | ||
| 165 | + { | ||
| 166 | + AcceptWaveform(stream._onlineStream, sample_rate, samples); | ||
| 167 | + } | ||
| 168 | + internal IntPtr GetStreamsIntPtr(OnlineStream[] streams) | ||
| 169 | + { | ||
| 170 | + int streams_len = streams.Length; | ||
| 171 | + int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream)); | ||
| 172 | + IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); | ||
| 173 | + unsafe | ||
| 174 | + { | ||
| 175 | + byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); | ||
| 176 | + for (int i = 0; i < streams_len; i++, ptrbds += (size)) | ||
| 177 | + { | ||
| 178 | + IntPtr streamIntptr = new IntPtr(ptrbds); | ||
| 179 | + Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false); | ||
| 180 | + } | ||
| 181 | + | ||
| 182 | + } | ||
| 183 | + return streamsIntPtr; | ||
| 184 | + } | ||
| 185 | + internal bool IsReady(OnlineStream stream) | ||
| 186 | + { | ||
| 187 | + return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0; | ||
| 188 | + } | ||
| 189 | + public void DecodeMultipleStreams(List<OnlineStream> streams) | ||
| 190 | + { | ||
| 191 | + while (true) | ||
| 192 | + { | ||
| 193 | + List<OnlineStream> streamList = new List<OnlineStream>(); | ||
| 194 | + foreach (OnlineStream stream in streams) | ||
| 195 | + { | ||
| 196 | + if (IsReady(stream)) | ||
| 197 | + { | ||
| 198 | + streamList.Add(stream); | ||
| 199 | + } | ||
| 200 | + } | ||
| 201 | + if (streamList.Count == 0) | ||
| 202 | + { | ||
| 203 | + break; | ||
| 204 | + } | ||
| 205 | + OnlineStream[] streamsBatch = new OnlineStream[streamList.Count]; | ||
| 206 | + for (int i = 0; i < streamsBatch.Length; i++) | ||
| 207 | + { | ||
| 208 | + streamsBatch[i] = streamList[i]; | ||
| 209 | + } | ||
| 210 | + streamList.Clear(); | ||
| 211 | + IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch); | ||
| 212 | + SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length); | ||
| 213 | + Marshal.FreeHGlobal(streamsIntPtr); | ||
| 214 | + } | ||
| 215 | + } | ||
| 216 | + public void DecodeStream(OnlineStream stream) | ||
| 217 | + { | ||
| 218 | + while (IsReady(stream)) | ||
| 219 | + { | ||
| 220 | + SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream); | ||
| 221 | + } | ||
| 222 | + } | ||
| 223 | + internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream) | ||
| 224 | + { | ||
| 225 | + IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream); | ||
| 226 | + OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip); | ||
| 227 | +#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 | ||
| 228 | + SherpaOnnxOnlineRecognizerResult result = | ||
| 229 | + (SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure( | ||
| 230 | + onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult)); | ||
| 231 | +#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 | ||
| 232 | + | ||
| 233 | +#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 234 | + string text = Marshal.PtrToStringAnsi(result.text); | ||
| 235 | +#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 236 | + OnlineRecognizerResultEntity onlineRecognizerResultEntity = | ||
| 237 | + new OnlineRecognizerResultEntity(); | ||
| 238 | + onlineRecognizerResultEntity.text = text; | ||
| 239 | + onlineRecognizerResultEntity.text_len = result.text_len; | ||
| 240 | + | ||
| 241 | + return onlineRecognizerResultEntity; | ||
| 242 | + } | ||
| 243 | + public OnlineRecognizerResultEntity GetResult(OnlineStream stream) | ||
| 244 | + { | ||
| 245 | + OnlineRecognizerResultEntity result = GetResult(stream._onlineStream); | ||
| 246 | + return result; | ||
| 247 | + } | ||
| 248 | + public List<OnlineRecognizerResultEntity> GetResults(List<OnlineStream> streams) | ||
| 249 | + { | ||
| 250 | + List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>(); | ||
| 251 | + foreach (OnlineStream stream in streams) | ||
| 252 | + { | ||
| 253 | + OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream); | ||
| 254 | + results.Add(onlineRecognizerResultEntity); | ||
| 255 | + } | ||
| 256 | + return results; | ||
| 257 | + } | ||
| 258 | + protected override void Dispose(bool disposing) | ||
| 259 | + { | ||
| 260 | + if (!disposing) | ||
| 261 | + { | ||
| 262 | + SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); | ||
| 263 | + _onlineRecognizer.impl = IntPtr.Zero; | ||
| 264 | + this._disposed = true; | ||
| 265 | + base.Dispose(); | ||
| 266 | + } | ||
| 267 | + } | ||
| 268 | + } | ||
| 269 | + public class OfflineBase : IDisposable | ||
| 270 | + { | ||
| 271 | + public void Dispose() | ||
| 272 | + { | ||
| 273 | + Dispose(disposing: true); | ||
| 274 | + GC.SuppressFinalize(this); | ||
| 275 | + } | ||
| 276 | + protected virtual void Dispose(bool disposing) | ||
| 277 | + { | ||
| 278 | + if (!disposing) | ||
| 279 | + { | ||
| 280 | + if (_offlineRecognizerResult != IntPtr.Zero) | ||
| 281 | + { | ||
| 282 | + SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); | ||
| 283 | + _offlineRecognizerResult = IntPtr.Zero; | ||
| 284 | + } | ||
| 285 | + if (_offlineStream.impl != IntPtr.Zero) | ||
| 286 | + { | ||
| 287 | + SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); | ||
| 288 | + _offlineStream.impl = IntPtr.Zero; | ||
| 289 | + } | ||
| 290 | + if (_offlineRecognizer.impl != IntPtr.Zero) | ||
| 291 | + { | ||
| 292 | + SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); | ||
| 293 | + _offlineRecognizer.impl = IntPtr.Zero; | ||
| 294 | + } | ||
| 295 | + this._disposed = true; | ||
| 296 | + } | ||
| 297 | + } | ||
| 298 | + ~OfflineBase() | ||
| 299 | + { | ||
| 300 | + Dispose(this._disposed); | ||
| 301 | + } | ||
| 302 | + internal SherpaOnnxOfflineStream _offlineStream; | ||
| 303 | + internal IntPtr _offlineRecognizerResult; | ||
| 304 | + internal SherpaOnnxOfflineRecognizer _offlineRecognizer; | ||
| 305 | + internal bool _disposed = false; | ||
| 306 | + } | ||
| 307 | + public class OfflineStream : OfflineBase | ||
| 308 | + { | ||
| 309 | + internal OfflineStream(SherpaOnnxOfflineStream offlineStream) | ||
| 310 | + { | ||
| 311 | + this._offlineStream = offlineStream; | ||
| 312 | + } | ||
| 313 | + | ||
| 314 | + protected override void Dispose(bool disposing) | ||
| 315 | + { | ||
| 316 | + if (!disposing) | ||
| 317 | + { | ||
| 318 | + SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); | ||
| 319 | + _offlineStream.impl = IntPtr.Zero; | ||
| 320 | + this._disposed = true; | ||
| 321 | + base.Dispose(); | ||
| 322 | + } | ||
| 323 | + } | ||
| 324 | + } | ||
| 325 | + public class OfflineRecognizerResult : OfflineBase | ||
| 326 | + { | ||
| 327 | + internal OfflineRecognizerResult(IntPtr offlineRecognizerResult) | ||
| 328 | + { | ||
| 329 | + this._offlineRecognizerResult = offlineRecognizerResult; | ||
| 330 | + } | ||
| 331 | + protected override void Dispose(bool disposing) | ||
| 332 | + { | ||
| 333 | + if (!disposing) | ||
| 334 | + { | ||
| 335 | + SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); | ||
| 336 | + _offlineRecognizerResult = IntPtr.Zero; | ||
| 337 | + this._disposed = true; | ||
| 338 | + base.Dispose(disposing); | ||
| 339 | + } | ||
| 340 | + } | ||
| 341 | + } | ||
| 342 | + public class OfflineRecognizer<T> : OfflineBase | ||
| 343 | + where T : class, new() | ||
| 344 | + { | ||
| 345 | + public OfflineRecognizer(T t, | ||
| 346 | + string tokensFilePath, string decoding_method = "greedy_search", | ||
| 347 | + int sample_rate = 16000, int feature_dim = 80, | ||
| 348 | + int num_threads = 2, bool debug = false) | ||
| 349 | + { | ||
| 350 | + SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer(); | ||
| 351 | + SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer(); | ||
| 352 | + SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc(); | ||
| 353 | + SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig(); | ||
| 354 | + if (t is not null && t.GetType() == typeof(OfflineTransducer)) | ||
| 355 | + { | ||
| 356 | + OfflineTransducer? offlineTransducer = t as OfflineTransducer; | ||
| 357 | +#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 358 | + Trace.Assert(File.Exists(offlineTransducer.DecoderFilename) | ||
| 359 | + && File.Exists(offlineTransducer.EncoderFilename) | ||
| 360 | + && File.Exists(offlineTransducer.JoinerFilename), "Please provide a model"); | ||
| 361 | +#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 362 | + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 363 | + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 364 | + transducer.encoder_filename = offlineTransducer.EncoderFilename; | ||
| 365 | + transducer.decoder_filename = offlineTransducer.DecoderFilename; | ||
| 366 | + transducer.joiner_filename = offlineTransducer.JoinerFilename; | ||
| 367 | + } | ||
| 368 | + else if (t is not null && t.GetType() == typeof(OfflineParaformer)) | ||
| 369 | + { | ||
| 370 | + OfflineParaformer? offlineParaformer = t as OfflineParaformer; | ||
| 371 | +#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 372 | + Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model"); | ||
| 373 | +#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 374 | + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 375 | + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 376 | + paraformer.model = offlineParaformer.Model; | ||
| 377 | + } | ||
| 378 | + else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc)) | ||
| 379 | + { | ||
| 380 | + OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc; | ||
| 381 | +#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 382 | + Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model"); | ||
| 383 | +#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 384 | + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 385 | + Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 386 | + nemo_ctc.model = offlineNemoEncDecCtc.Model; | ||
| 387 | + } | ||
| 388 | + | ||
| 389 | + model_config.transducer = transducer; | ||
| 390 | + model_config.paraformer = paraformer; | ||
| 391 | + model_config.nemo_ctc = nemo_ctc; | ||
| 392 | + model_config.num_threads = num_threads; | ||
| 393 | + model_config.debug = debug; | ||
| 394 | + model_config.tokens = tokensFilePath; | ||
| 395 | + | ||
| 396 | + SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); | ||
| 397 | + feat_config.sample_rate = sample_rate; | ||
| 398 | + feat_config.feature_dim = feature_dim; | ||
| 399 | + | ||
| 400 | + SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig; | ||
| 401 | + sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method; | ||
| 402 | + sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config; | ||
| 403 | + sherpaOnnxOfflineRecognizerConfig.model_config = model_config; | ||
| 404 | + | ||
| 405 | + _offlineRecognizer = | ||
| 406 | + SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig); | ||
| 407 | + } | ||
| 408 | + internal OfflineStream CreateOfflineStream() | ||
| 409 | + { | ||
| 410 | + SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer); | ||
| 411 | + return new OfflineStream(stream); | ||
| 412 | + } | ||
| 413 | + public OfflineStream[] CreateOfflineStream(List<float[]> samplesList) | ||
| 414 | + { | ||
| 415 | + int batch_size = samplesList.Count; | ||
| 416 | + OfflineStream[] streams = new OfflineStream[batch_size]; | ||
| 417 | + List<string> wavFiles = new List<string>(); | ||
| 418 | + for (int i = 0; i < batch_size; i++) | ||
| 419 | + { | ||
| 420 | + OfflineStream stream = CreateOfflineStream(); | ||
| 421 | + AcceptWaveform(stream._offlineStream, 16000, samplesList[i]); | ||
| 422 | + streams[i] = stream; | ||
| 423 | + } | ||
| 424 | + return streams; | ||
| 425 | + } | ||
| 426 | + internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples) | ||
| 427 | + { | ||
| 428 | + SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length); | ||
| 429 | + } | ||
| 430 | + internal IntPtr GetStreamsIntPtr(OfflineStream[] streams) | ||
| 431 | + { | ||
| 432 | + int streams_len = streams.Length; | ||
| 433 | + int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream)); | ||
| 434 | + IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); | ||
| 435 | + unsafe | ||
| 436 | + { | ||
| 437 | + byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); | ||
| 438 | + for (int i = 0; i < streams_len; i++, ptrbds += (size)) | ||
| 439 | + { | ||
| 440 | + IntPtr streamIntptr = new IntPtr(ptrbds); | ||
| 441 | + Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false); | ||
| 442 | + } | ||
| 443 | + } | ||
| 444 | + return streamsIntPtr; | ||
| 445 | + } | ||
| 446 | + public void DecodeMultipleOfflineStreams(OfflineStream[] streams) | ||
| 447 | + { | ||
| 448 | + IntPtr streamsIntPtr = GetStreamsIntPtr(streams); | ||
| 449 | + SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length); | ||
| 450 | + Marshal.FreeHGlobal(streamsIntPtr); | ||
| 451 | + } | ||
| 452 | + internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream) | ||
| 453 | + { | ||
| 454 | + IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream); | ||
| 455 | + OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip); | ||
| 456 | +#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 | ||
| 457 | + SherpaOnnxOfflineRecognizerResult result = | ||
| 458 | + (SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure( | ||
| 459 | + offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult)); | ||
| 460 | +#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 | ||
| 461 | + | ||
| 462 | +#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 463 | + string text = Marshal.PtrToStringAnsi(result.text); | ||
| 464 | +#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 465 | + OfflineRecognizerResultEntity offlineRecognizerResultEntity = | ||
| 466 | + new OfflineRecognizerResultEntity(); | ||
| 467 | + offlineRecognizerResultEntity.text = text; | ||
| 468 | + offlineRecognizerResultEntity.text_len = result.text_len; | ||
| 469 | + | ||
| 470 | + return offlineRecognizerResultEntity; | ||
| 471 | + } | ||
| 472 | + public List<OfflineRecognizerResultEntity> GetResults(OfflineStream[] streams) | ||
| 473 | + { | ||
| 474 | + List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>(); | ||
| 475 | + foreach (OfflineStream stream in streams) | ||
| 476 | + { | ||
| 477 | + OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream); | ||
| 478 | + results.Add(offlineRecognizerResultEntity); | ||
| 479 | + } | ||
| 480 | + return results; | ||
| 481 | + } | ||
| 482 | + protected override void Dispose(bool disposing) | ||
| 483 | + { | ||
| 484 | + if (!disposing) | ||
| 485 | + { | ||
| 486 | + SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); | ||
| 487 | + _offlineRecognizer.impl = IntPtr.Zero; | ||
| 488 | + this._disposed = true; | ||
| 489 | + base.Dispose(); | ||
| 490 | + } | ||
| 491 | + } | ||
| 492 | + } | ||
| 493 | + internal static partial class SherpaOnnxSharp | ||
| 494 | + { | ||
| 495 | + private const string dllName = @"SherpaOnnxSharp"; | ||
| 496 | + | ||
| 497 | + [DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 498 | + internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config); | ||
| 499 | + | ||
| 500 | + [DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 501 | + internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer); | ||
| 502 | + | ||
| 503 | + [DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 504 | + internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size); | ||
| 505 | + | ||
| 506 | + [DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 507 | + internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream); | ||
| 508 | + | ||
| 509 | + [DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 510 | + internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr | ||
| 511 | + streams, int n); | ||
| 512 | + | ||
| 513 | + [DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 514 | + internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream); | ||
| 515 | + | ||
| 516 | + [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 517 | + internal static extern void DestroyOfflineRecognizerResult(IntPtr result); | ||
| 518 | + | ||
| 519 | + [DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 520 | + internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream); | ||
| 521 | + | ||
| 522 | + [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)] | ||
| 523 | + internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer); | ||
| 524 | + | ||
| 525 | + [DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] | ||
| 526 | + internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config); | ||
| 527 | + | ||
| 528 | + /// Free a pointer returned by CreateOnlineRecognizer() | ||
| 529 | + /// | ||
| 530 | + /// @param p A pointer returned by CreateOnlineRecognizer() | ||
| 531 | + [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] | ||
| 532 | + internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer); | ||
| 533 | + | ||
| 534 | + /// Create an online stream for accepting wave samples. | ||
| 535 | + /// | ||
| 536 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 537 | + /// @return Return a pointer to an OnlineStream. The user has to invoke | ||
| 538 | + /// DestroyOnlineStream() to free it to avoid memory leak. | ||
| 539 | + [DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 540 | + internal static extern SherpaOnnxOnlineStream CreateOnlineStream( | ||
| 541 | + SherpaOnnxOnlineRecognizer recognizer); | ||
| 542 | + | ||
| 543 | + /// Destroy an online stream. | ||
| 544 | + /// | ||
| 545 | + /// @param stream A pointer returned by CreateOnlineStream() | ||
| 546 | + [DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 547 | + internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream); | ||
| 548 | + | ||
| 549 | + /// Accept input audio samples and compute the features. | ||
| 550 | + /// The user has to invoke DecodeOnlineStream() to run the neural network and | ||
| 551 | + /// decoding. | ||
| 552 | + /// | ||
| 553 | + /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 554 | + /// @param sample_rate Sample rate of the input samples. If it is different | ||
| 555 | + /// from config.feat_config.sample_rate, we will do | ||
| 556 | + /// resampling inside sherpa-onnx. | ||
| 557 | + /// @param samples A pointer to a 1-D array containing audio samples. | ||
| 558 | + /// The range of samples has to be normalized to [-1, 1]. | ||
| 559 | + /// @param n Number of elements in the samples array. | ||
| 560 | + [DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)] | ||
| 561 | + internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate, | ||
| 562 | + float[] samples, int n); | ||
| 563 | + | ||
| 564 | + /// Return 1 if there are enough number of feature frames for decoding. | ||
| 565 | + /// Return 0 otherwise. | ||
| 566 | + /// | ||
| 567 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer | ||
| 568 | + /// @param stream A pointer returned by CreateOnlineStream | ||
| 569 | + [DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)] | ||
| 570 | + internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer, | ||
| 571 | + SherpaOnnxOnlineStream stream); | ||
| 572 | + | ||
| 573 | + /// Call this function to run the neural network model and decoding. | ||
| 574 | + // | ||
| 575 | + /// Precondition for this function: IsOnlineStreamReady() MUST return 1. | ||
| 576 | + /// | ||
| 577 | + /// Usage example: | ||
| 578 | + /// | ||
| 579 | + /// while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 580 | + /// DecodeOnlineStream(recognizer, stream); | ||
| 581 | + /// } | ||
| 582 | + /// | ||
| 583 | + [DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 584 | + internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer, | ||
| 585 | + SherpaOnnxOnlineStream stream); | ||
| 586 | + | ||
| 587 | + /// This function is similar to DecodeOnlineStream(). It decodes multiple | ||
| 588 | + /// OnlineStream in parallel. | ||
| 589 | + /// | ||
| 590 | + /// Caution: The caller has to ensure each OnlineStream is ready, i.e., | ||
| 591 | + /// IsOnlineStreamReady() for that stream should return 1. | ||
| 592 | + /// | ||
| 593 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 594 | + /// @param streams A pointer array containing pointers returned by | ||
| 595 | + /// CreateOnlineRecognizer() | ||
| 596 | + /// @param n Number of elements in the given streams array. | ||
| 597 | + [DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)] | ||
| 598 | + internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer, | ||
| 599 | + IntPtr streams, int n); | ||
| 600 | + | ||
| 601 | + /// Get the decoding results so far for an OnlineStream. | ||
| 602 | + /// | ||
| 603 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 604 | + /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 605 | + /// @return A pointer containing the result. The user has to invoke | ||
| 606 | + /// DestroyOnlineRecognizerResult() to free the returned pointer to | ||
| 607 | + /// avoid memory leak. | ||
| 608 | + [DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 609 | + internal static extern IntPtr GetOnlineStreamResult( | ||
| 610 | + SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream); | ||
| 611 | + | ||
| 612 | + /// Destroy the pointer returned by GetOnlineStreamResult(). | ||
| 613 | + /// | ||
| 614 | + /// @param r A pointer returned by GetOnlineStreamResult() | ||
| 615 | + [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 616 | + internal static extern void DestroyOnlineRecognizerResult(IntPtr result); | ||
| 617 | + | ||
| 618 | + /// Reset an OnlineStream , which clears the neural network model state | ||
| 619 | + /// and the state for decoding. | ||
| 620 | + /// | ||
| 621 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 622 | + /// @param stream A pointer returned by CreateOnlineStream | ||
| 623 | + [DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)] | ||
| 624 | + internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer, | ||
| 625 | + SherpaOnnxOnlineStream stream); | ||
| 626 | + | ||
| 627 | + /// Signal that no more audio samples would be available. | ||
| 628 | + /// After this call, you cannot call AcceptWaveform() any more. | ||
| 629 | + /// | ||
| 630 | + /// @param stream A pointer returned by CreateOnlineStream() | ||
| 631 | + [DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)] | ||
| 632 | + internal static extern void InputFinished(SherpaOnnxOnlineStream stream); | ||
| 633 | + | ||
| 634 | + /// Return 1 if an endpoint has been detected. | ||
| 635 | + /// | ||
| 636 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 637 | + /// @param stream A pointer returned by CreateOnlineStream() | ||
| 638 | + /// @return Return 1 if an endpoint is detected. Return 0 otherwise. | ||
| 639 | + [DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)] | ||
| 640 | + internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer, | ||
| 641 | + SherpaOnnxOnlineStream stream); | ||
| 642 | + } | ||
| 643 | + internal struct SherpaOnnxOfflineTransducer | ||
| 644 | + { | ||
| 645 | + public string encoder_filename; | ||
| 646 | + public string decoder_filename; | ||
| 647 | + public string joiner_filename; | ||
| 648 | + public SherpaOnnxOfflineTransducer() | ||
| 649 | + { | ||
| 650 | + encoder_filename = ""; | ||
| 651 | + decoder_filename = ""; | ||
| 652 | + joiner_filename = ""; | ||
| 653 | + } | ||
| 654 | + }; | ||
| 655 | + internal struct SherpaOnnxOfflineParaformer | ||
| 656 | + { | ||
| 657 | + public string model; | ||
| 658 | + public SherpaOnnxOfflineParaformer() | ||
| 659 | + { | ||
| 660 | + model = ""; | ||
| 661 | + } | ||
| 662 | + }; | ||
| 663 | + internal struct SherpaOnnxOfflineNemoEncDecCtc | ||
| 664 | + { | ||
| 665 | + public string model; | ||
| 666 | + public SherpaOnnxOfflineNemoEncDecCtc() | ||
| 667 | + { | ||
| 668 | + model = ""; | ||
| 669 | + } | ||
| 670 | + }; | ||
| 671 | + internal struct SherpaOnnxOfflineModelConfig | ||
| 672 | + { | ||
| 673 | + public SherpaOnnxOfflineTransducer transducer; | ||
| 674 | + public SherpaOnnxOfflineParaformer paraformer; | ||
| 675 | + public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; | ||
| 676 | + public string tokens; | ||
| 677 | + public int num_threads; | ||
| 678 | + public bool debug; | ||
| 679 | + }; | ||
| 680 | + /// It expects 16 kHz 16-bit single channel wave format. | ||
| 681 | + internal struct SherpaOnnxFeatureConfig | ||
| 682 | + { | ||
| 683 | + /// Sample rate of the input data. MUST match the one expected | ||
| 684 | + /// by the model. For instance, it should be 16000 for models provided | ||
| 685 | + /// by us. | ||
| 686 | + public int sample_rate; | ||
| 687 | + | ||
| 688 | + /// Feature dimension of the model. | ||
| 689 | + /// For instance, it should be 80 for models provided by us. | ||
| 690 | + public int feature_dim; | ||
| 691 | + }; | ||
| 692 | + internal struct SherpaOnnxOfflineRecognizerConfig | ||
| 693 | + { | ||
| 694 | + public SherpaOnnxFeatureConfig feat_config; | ||
| 695 | + public SherpaOnnxOfflineModelConfig model_config; | ||
| 696 | + | ||
| 697 | + /// Possible values are: greedy_search, modified_beam_search | ||
| 698 | + public string decoding_method; | ||
| 699 | + | ||
| 700 | + }; | ||
| 701 | + internal struct SherpaOnnxOfflineRecognizer | ||
| 702 | + { | ||
| 703 | + public IntPtr impl; | ||
| 704 | + }; | ||
| 705 | + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] | ||
| 706 | + internal struct SherpaOnnxOfflineStream | ||
| 707 | + { | ||
| 708 | + public IntPtr impl; | ||
| 709 | + }; | ||
| 710 | + internal struct SherpaOnnxOfflineRecognizerResult | ||
| 711 | + { | ||
| 712 | + public IntPtr text; | ||
| 713 | + public int text_len; | ||
| 714 | + } | ||
| 715 | + internal struct SherpaOnnxOnlineTransducer | ||
| 716 | + { | ||
| 717 | + public string encoder_filename; | ||
| 718 | + public string decoder_filename; | ||
| 719 | + public string joiner_filename; | ||
| 720 | + public SherpaOnnxOnlineTransducer() | ||
| 721 | + { | ||
| 722 | + encoder_filename = string.Empty; | ||
| 723 | + decoder_filename = string.Empty; | ||
| 724 | + joiner_filename = string.Empty; | ||
| 725 | + } | ||
| 726 | + }; | ||
| 727 | + internal struct SherpaOnnxOnlineModelConfig | ||
| 728 | + { | ||
| 729 | + public SherpaOnnxOnlineTransducer transducer; | ||
| 730 | + public string tokens; | ||
| 731 | + public int num_threads; | ||
| 732 | + public bool debug; // true to print debug information of the model | ||
| 733 | + }; | ||
| 734 | + internal struct SherpaOnnxOnlineRecognizerConfig | ||
| 735 | + { | ||
| 736 | + public SherpaOnnxFeatureConfig feat_config; | ||
| 737 | + public SherpaOnnxOnlineModelConfig model_config; | ||
| 738 | + | ||
| 739 | + /// Possible values are: greedy_search, modified_beam_search | ||
| 740 | + public string decoding_method; | ||
| 741 | + | ||
| 742 | + /// Used only when decoding_method is modified_beam_search | ||
| 743 | + /// Example value: 4 | ||
| 744 | + public int max_active_paths; | ||
| 745 | + | ||
| 746 | + /// 0 to disable endpoint detection. | ||
| 747 | + /// A non-zero value to enable endpoint detection. | ||
| 748 | + public int enable_endpoint; | ||
| 749 | + | ||
| 750 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 751 | + /// this value even if nothing has been decoded. | ||
| 752 | + /// Used only when enable_endpoint is not 0. | ||
| 753 | + public float rule1_min_trailing_silence; | ||
| 754 | + | ||
| 755 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 756 | + /// this value after something that is not blank has been decoded. | ||
| 757 | + /// Used only when enable_endpoint is not 0. | ||
| 758 | + public float rule2_min_trailing_silence; | ||
| 759 | + | ||
| 760 | + /// An endpoint is detected if the utterance in seconds is larger than | ||
| 761 | + /// this value. | ||
| 762 | + /// Used only when enable_endpoint is not 0. | ||
| 763 | + public float rule3_min_utterance_length; | ||
| 764 | + }; | ||
| 765 | + internal struct SherpaOnnxOnlineRecognizerResult | ||
| 766 | + { | ||
| 767 | + public IntPtr text; | ||
| 768 | + public int text_len; | ||
| 769 | + // TODO: Add more fields | ||
| 770 | + } | ||
| 771 | + internal struct SherpaOnnxOnlineRecognizer | ||
| 772 | + { | ||
| 773 | + public IntPtr impl; | ||
| 774 | + }; | ||
| 775 | + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] | ||
| 776 | + internal struct SherpaOnnxOnlineStream | ||
| 777 | + { | ||
| 778 | + public IntPtr impl; | ||
| 779 | + }; | ||
| 780 | + public class OfflineNemoEncDecCtc | ||
| 781 | + { | ||
| 782 | + private string model = string.Empty; | ||
| 783 | + public string Model { get => model; set => model = value; } | ||
| 784 | + } | ||
| 785 | + public class OfflineParaformer | ||
| 786 | + { | ||
| 787 | + private string model = string.Empty; | ||
| 788 | + public string Model { get => model; set => model = value; } | ||
| 789 | + } | ||
| 790 | + public class OfflineRecognizerResultEntity | ||
| 791 | + { | ||
| 792 | + /// <summary> | ||
| 793 | + /// recognizer result | ||
| 794 | + /// </summary> | ||
| 795 | + public string? text { get; set; } | ||
| 796 | + /// <summary> | ||
| 797 | + /// recognizer result length | ||
| 798 | + /// </summary> | ||
| 799 | + public int text_len { get; set; } | ||
| 800 | + /// <summary> | ||
| 801 | + /// decode tokens | ||
| 802 | + /// </summary> | ||
| 803 | + public List<string>? tokens { get; set; } | ||
| 804 | + /// <summary> | ||
| 805 | + /// timestamps | ||
| 806 | + /// </summary> | ||
| 807 | + public List<float>? timestamps { get; set; } | ||
| 808 | + } | ||
| 809 | + public class OfflineTransducer | ||
| 810 | + { | ||
| 811 | + private string encoderFilename = string.Empty; | ||
| 812 | + private string decoderFilename = string.Empty; | ||
| 813 | + private string joinerFilename = string.Empty; | ||
| 814 | + public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } | ||
| 815 | + public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } | ||
| 816 | + public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } | ||
| 817 | + } | ||
| 818 | + public class OnlineEndpoint | ||
| 819 | + { | ||
| 820 | + /// 0 to disable endpoint detection. | ||
| 821 | + /// A non-zero value to enable endpoint detection. | ||
| 822 | + private int enableEndpoint; | ||
| 823 | + | ||
| 824 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 825 | + /// this value even if nothing has been decoded. | ||
| 826 | + /// Used only when enable_endpoint is not 0. | ||
| 827 | + private float rule1MinTrailingSilence; | ||
| 828 | + | ||
| 829 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 830 | + /// this value after something that is not blank has been decoded. | ||
| 831 | + /// Used only when enable_endpoint is not 0. | ||
| 832 | + private float rule2MinTrailingSilence; | ||
| 833 | + | ||
| 834 | + /// An endpoint is detected if the utterance in seconds is larger than | ||
| 835 | + /// this value. | ||
| 836 | + /// Used only when enable_endpoint is not 0. | ||
| 837 | + private float rule3MinUtteranceLength; | ||
| 838 | + | ||
| 839 | + public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; } | ||
| 840 | + public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; } | ||
| 841 | + public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; } | ||
| 842 | + public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; } | ||
| 843 | + } | ||
| 844 | + public class OnlineRecognizerResultEntity | ||
| 845 | + { | ||
| 846 | + /// <summary> | ||
| 847 | + /// recognizer result | ||
| 848 | + /// </summary> | ||
| 849 | + public string? text { get; set; } | ||
| 850 | + /// <summary> | ||
| 851 | + /// recognizer result length | ||
| 852 | + /// </summary> | ||
| 853 | + public int text_len { get; set; } | ||
| 854 | + /// <summary> | ||
| 855 | + /// decode tokens | ||
| 856 | + /// </summary> | ||
| 857 | + public List<string>? tokens { get; set; } | ||
| 858 | + /// <summary> | ||
| 859 | + /// timestamps | ||
| 860 | + /// </summary> | ||
| 861 | + public List<float>? timestamps { get; set; } | ||
| 862 | + } | ||
| 863 | + public class OnlineTransducer | ||
| 864 | + { | ||
| 865 | + private string encoderFilename = string.Empty; | ||
| 866 | + private string decoderFilename = string.Empty; | ||
| 867 | + private string joinerFilename = string.Empty; | ||
| 868 | + public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } | ||
| 869 | + public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } | ||
| 870 | + public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } | ||
| 871 | + } | ||
| 872 | +} |
sherpa-onnx/csharp-api/SherpaOnnx.csproj
0 → 100644
sherpa-onnx/csharp-api/offline-api.cpp
0 → 100644
| 1 | +// sherpa-onnx/sharp-api/offline-api.cpp | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2023 Manyeyes Corporation | ||
| 4 | + | ||
| 5 | +#include "offline-api.h" | ||
| 6 | + | ||
| 7 | +#include "sherpa-onnx/csrc/display.h" | ||
| 8 | +#include "sherpa-onnx/csrc/offline-recognizer.h" | ||
| 9 | + | ||
| 10 | +namespace sherpa_onnx | ||
| 11 | +{ | ||
| 12 | + struct SherpaOnnxOfflineRecognizer { | ||
| 13 | + sherpa_onnx::OfflineRecognizer* impl; | ||
| 14 | + }; | ||
| 15 | + | ||
| 16 | + struct SherpaOnnxOfflineStream { | ||
| 17 | + std::unique_ptr<sherpa_onnx::OfflineStream> impl; | ||
| 18 | + explicit SherpaOnnxOfflineStream(std::unique_ptr<sherpa_onnx::OfflineStream> p) | ||
| 19 | + : impl(std::move(p)) {} | ||
| 20 | + }; | ||
| 21 | + | ||
| 22 | + struct SherpaOnnxDisplay { | ||
| 23 | + std::unique_ptr<sherpa_onnx::Display> impl; | ||
| 24 | + }; | ||
| 25 | + | ||
| 26 | + SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer( | ||
| 27 | + const SherpaOnnxOfflineRecognizerConfig* config) { | ||
| 28 | + sherpa_onnx::OfflineRecognizerConfig recognizer_config; | ||
| 29 | + | ||
| 30 | + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | ||
| 31 | + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | ||
| 32 | + | ||
| 33 | + if (strlen(config->model_config.transducer.encoder_filename) > 0) { | ||
| 34 | + recognizer_config.model_config.transducer.encoder_filename = | ||
| 35 | + config->model_config.transducer.encoder_filename; | ||
| 36 | + recognizer_config.model_config.transducer.decoder_filename = | ||
| 37 | + config->model_config.transducer.decoder_filename; | ||
| 38 | + recognizer_config.model_config.transducer.joiner_filename = | ||
| 39 | + config->model_config.transducer.joiner_filename; | ||
| 40 | + } | ||
| 41 | + else if (strlen(config->model_config.paraformer.model) > 0) { | ||
| 42 | + recognizer_config.model_config.paraformer.model = | ||
| 43 | + config->model_config.paraformer.model; | ||
| 44 | + } | ||
| 45 | + else if (strlen(config->model_config.nemo_ctc.model) > 0) { | ||
| 46 | + recognizer_config.model_config.nemo_ctc.model = | ||
| 47 | + config->model_config.nemo_ctc.model; | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + recognizer_config.model_config.tokens = | ||
| 51 | + config->model_config.tokens; | ||
| 52 | + recognizer_config.model_config.num_threads = | ||
| 53 | + config->model_config.num_threads; | ||
| 54 | + recognizer_config.model_config.debug = | ||
| 55 | + config->model_config.debug; | ||
| 56 | + | ||
| 57 | + recognizer_config.decoding_method = config->decoding_method; | ||
| 58 | + | ||
| 59 | + SherpaOnnxOfflineRecognizer* recognizer = | ||
| 60 | + new SherpaOnnxOfflineRecognizer; | ||
| 61 | + recognizer->impl = | ||
| 62 | + new sherpa_onnx::OfflineRecognizer(recognizer_config); | ||
| 63 | + | ||
| 64 | + return recognizer; | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + SherpaOnnxOfflineStream* __stdcall CreateOfflineStream( | ||
| 68 | + SherpaOnnxOfflineRecognizer* recognizer) { | ||
| 69 | + SherpaOnnxOfflineStream* stream = | ||
| 70 | + new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); | ||
| 71 | + return stream; | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + void __stdcall AcceptWaveform( | ||
| 75 | + SherpaOnnxOfflineStream* stream, | ||
| 76 | + int32_t sample_rate, | ||
| 77 | + const float* samples, int32_t samples_size) { | ||
| 78 | + std::vector<float> waveform{ samples, samples + samples_size }; | ||
| 79 | + stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size()); | ||
| 80 | + } | ||
| 81 | + | ||
| 82 | + void __stdcall DecodeOfflineStream( | ||
| 83 | + SherpaOnnxOfflineRecognizer* recognizer, | ||
| 84 | + SherpaOnnxOfflineStream* stream) { | ||
| 85 | + recognizer->impl->DecodeStream(stream->impl.get()); | ||
| 86 | + } | ||
| 87 | + | ||
| 88 | + void __stdcall DecodeMultipleOfflineStreams( | ||
| 89 | + SherpaOnnxOfflineRecognizer* recognizer, | ||
| 90 | + SherpaOnnxOfflineStream** streams, int32_t n) { | ||
| 91 | + std::vector<sherpa_onnx::OfflineStream*> ss(n); | ||
| 92 | + for (int32_t i = 0; i != n; ++i) { | ||
| 93 | + ss[i] = streams[i]->impl.get(); | ||
| 94 | + } | ||
| 95 | + recognizer->impl->DecodeStreams(ss.data(), n); | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult( | ||
| 99 | + SherpaOnnxOfflineStream* stream) { | ||
| 100 | + sherpa_onnx::OfflineRecognitionResult result = | ||
| 101 | + stream->impl->GetResult(); | ||
| 102 | + const auto& text = result.text; | ||
| 103 | + auto r = new SherpaOnnxOfflineRecognizerResult; | ||
| 104 | + r->text = new char[text.size() + 1]; | ||
| 105 | + std::copy(text.begin(), text.end(), const_cast<char*>(r->text)); | ||
| 106 | + const_cast<char*>(r->text)[text.size()] = 0; | ||
| 107 | + r->text_len = text.size(); | ||
| 108 | + return r; | ||
| 109 | + } | ||
| 110 | + | ||
| 111 | + | ||
| 112 | + /// Free a pointer returned by CreateOfflineRecognizer() | ||
| 113 | + /// | ||
| 114 | + /// @param p A pointer returned by CreateOfflineRecognizer() | ||
| 115 | + void __stdcall DestroyOfflineRecognizer( | ||
| 116 | + SherpaOnnxOfflineRecognizer* recognizer) { | ||
| 117 | + delete recognizer->impl; | ||
| 118 | + delete recognizer; | ||
| 119 | + } | ||
| 120 | + | ||
| 121 | + /// Destory an offline stream. | ||
| 122 | + /// | ||
| 123 | + /// @param stream A pointer returned by CreateOfflineStream() | ||
| 124 | + void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) { | ||
| 125 | + delete stream; | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + /// Destroy the pointer returned by GetOfflineStreamResult(). | ||
| 129 | + /// | ||
| 130 | + /// @param r A pointer returned by GetOfflineStreamResult() | ||
| 131 | + void __stdcall DestroyOfflineRecognizerResult( | ||
| 132 | + SherpaOnnxOfflineRecognizerResult* r) { | ||
| 133 | + delete r->text; | ||
| 134 | + delete r; | ||
| 135 | + } | ||
| 136 | +}// namespace sherpa_onnx |
sherpa-onnx/csharp-api/offline-api.h
0 → 100644
| 1 | +// sherpa-onnx/sharp-api/offline-api.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2023 Manyeyes Corporation | ||
| 4 | + | ||
| 5 | +#pragma once | ||
| 6 | + | ||
| 7 | +#include <list> | ||
| 8 | + | ||
| 9 | +namespace sherpa_onnx | ||
| 10 | +{ | ||
| 11 | + /// Please refer to | ||
| 12 | + /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 13 | + /// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 14 | + /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 15 | + /// from there. | ||
| 16 | + typedef struct SherpaOnnxOfflineTransducer { | ||
| 17 | + const char* encoder_filename; | ||
| 18 | + const char* decoder_filename; | ||
| 19 | + const char* joiner_filename; | ||
| 20 | + } SherpaOnnxOfflineTransducer; | ||
| 21 | + | ||
| 22 | + typedef struct SherpaOnnxOfflineParaformer { | ||
| 23 | + const char* model; | ||
| 24 | + }SherpaOnnxOfflineParaformer; | ||
| 25 | + | ||
| 26 | + typedef struct SherpaOnnxOfflineNemoEncDecCtc { | ||
| 27 | + const char* model; | ||
| 28 | + }SherpaOnnxOfflineNemoEncDecCtc; | ||
| 29 | + | ||
| 30 | + | ||
| 31 | + typedef struct SherpaOnnxOfflineModelConfig { | ||
| 32 | + SherpaOnnxOfflineTransducer transducer; | ||
| 33 | + SherpaOnnxOfflineParaformer paraformer; | ||
| 34 | + SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; | ||
| 35 | + const char* tokens; | ||
| 36 | + const int32_t num_threads; | ||
| 37 | + const bool debug; | ||
| 38 | + } SherpaOnnxOfflineModelConfig; | ||
| 39 | + | ||
| 40 | + /// It expects 16 kHz 16-bit single channel wave format. | ||
| 41 | + typedef struct SherpaOnnxFeatureConfig { | ||
| 42 | + /// Sample rate of the input data. MUST match the one expected | ||
| 43 | + /// by the model. For instance, it should be 16000 for models provided | ||
| 44 | + /// by us. | ||
| 45 | + int32_t sample_rate; | ||
| 46 | + | ||
| 47 | + /// Feature dimension of the model. | ||
| 48 | + /// For instance, it should be 80 for models provided by us. | ||
| 49 | + int32_t feature_dim; | ||
| 50 | + } SherpaOnnxFeatureConfig; | ||
| 51 | + | ||
| 52 | + typedef struct SherpaOnnxOfflineRecognizerConfig { | ||
| 53 | + SherpaOnnxFeatureConfig feat_config; | ||
| 54 | + SherpaOnnxOfflineModelConfig model_config; | ||
| 55 | + | ||
| 56 | + /// Possible values are: greedy_search, modified_beam_search | ||
| 57 | + const char* decoding_method; | ||
| 58 | + | ||
| 59 | + } SherpaOnnxOfflineRecognizerConfig; | ||
| 60 | + | ||
| 61 | + typedef struct SherpaOnnxOfflineRecognizerResult { | ||
| 62 | + // Recognition results. | ||
| 63 | + // For English, it consists of space separated words. | ||
| 64 | + // For Chinese, it consists of Chinese words without spaces. | ||
| 65 | + char* text; | ||
| 66 | + int text_len; | ||
| 67 | + | ||
| 68 | + // Decoded results at the token level. | ||
| 69 | + // For instance, for BPE-based models it consists of a list of BPE tokens. | ||
| 70 | + // std::vector<std::string> tokens; | ||
| 71 | + | ||
| 72 | + // timestamps.size() == tokens.size() | ||
| 73 | + // timestamps[i] records the time in seconds when tokens[i] is decoded. | ||
| 74 | + // std::vector<float> timestamps; | ||
| 75 | + } SherpaOnnxOfflineRecognizerResult; | ||
| 76 | + | ||
| 77 | + /// Note: OfflineRecognizer here means StreamingRecognizer. | ||
| 78 | + /// It does not need to access the Internet during recognition. | ||
| 79 | + /// Everything is run locally. | ||
| 80 | + typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer; | ||
| 81 | + | ||
| 82 | + typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; | ||
| 83 | + | ||
| 84 | + extern "C" __declspec(dllexport) | ||
| 85 | + SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer( | ||
| 86 | + const SherpaOnnxOfflineRecognizerConfig * config); | ||
| 87 | + | ||
| 88 | + extern "C" __declspec(dllexport) | ||
| 89 | + SherpaOnnxOfflineStream * __stdcall CreateOfflineStream( | ||
| 90 | + SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer); | ||
| 91 | + | ||
| 92 | + extern "C" __declspec(dllexport) | ||
| 93 | + void __stdcall AcceptWaveform( | ||
| 94 | + SherpaOnnxOfflineStream * stream, int32_t sample_rate, | ||
| 95 | + const float* samples, int32_t samples_size); | ||
| 96 | + | ||
| 97 | + extern "C" __declspec(dllexport) | ||
| 98 | + void __stdcall DecodeOfflineStream( | ||
| 99 | + SherpaOnnxOfflineRecognizer * recognizer, | ||
| 100 | + SherpaOnnxOfflineStream * stream); | ||
| 101 | + | ||
| 102 | + extern "C" __declspec(dllexport) | ||
| 103 | + void __stdcall DecodeMultipleOfflineStreams( | ||
| 104 | + SherpaOnnxOfflineRecognizer * recognizer, | ||
| 105 | + SherpaOnnxOfflineStream * *streams, int32_t n); | ||
| 106 | + | ||
| 107 | + extern "C" __declspec(dllexport) | ||
| 108 | + SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult( | ||
| 109 | + SherpaOnnxOfflineStream * stream); | ||
| 110 | + | ||
| 111 | + extern "C" __declspec(dllexport) | ||
| 112 | + void __stdcall DestroyOfflineRecognizer( | ||
| 113 | + SherpaOnnxOfflineRecognizer * recognizer); | ||
| 114 | + | ||
| 115 | + extern "C" __declspec(dllexport) | ||
| 116 | + void __stdcall DestroyOfflineStream( | ||
| 117 | + SherpaOnnxOfflineStream * stream); | ||
| 118 | + | ||
| 119 | + extern "C" __declspec(dllexport) | ||
| 120 | + void __stdcall DestroyOfflineRecognizerResult( | ||
| 121 | + SherpaOnnxOfflineRecognizerResult * r); | ||
| 122 | +}// namespace sherpa_onnx |
-
请 注册 或 登录 后发表评论