manyeyes
Committed by GitHub

Add a C# api for offline-recognizer of sherpa-onnx (#129)

  1 +// See https://aka.ms/new-console-template for more information
  2 +// Copyright (c) 2023 by manyeyes
  3 +using SherpaOnnx;
  4 +/// Please refer to
  5 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  6 +/// to download pre-trained models. That is, you can find encoder-xxx.onnx
  7 +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
  8 +/// from there.
  9 +
  10 +/// download model eg:
  11 +/// (The directory where the application runs)
  12 +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
  13 +/// cd /path/to
  14 +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01
  15 +/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example
  16 +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512
  17 +
  18 +/// NuGet for sherpa-onnx
  19 +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
  20 +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
  21 +
  22 +// transducer Usage:
  23 +/*
  24 + .\SherpaOnnx.Examples.exe `
  25 + --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `
  26 + --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `
  27 + --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `
  28 + --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `
  29 + --num-threads=2 `
  30 + --decoding-method=greedy_search `
  31 + --debug=false `
  32 + ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav
  33 + */
  34 +
  35 +// paraformer Usage:
  36 +/*
  37 + .\SherpaOnnx.Examples.exe `
  38 + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
  39 + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
  40 + --num-threads=2 `
  41 + --decoding-method=greedy_search `
  42 + --debug=false `
  43 + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
  44 + */
  45 +
  46 +// paraformer Usage:
  47 +/*
  48 + .\SherpaOnnx.Examples.exe `
  49 + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
  50 + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
  51 + --num-threads=2 `
  52 + --decoding-method=greedy_search `
  53 + --debug=false `
  54 + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
  55 + */
  56 +
  57 +
  58 +internal class OfflineDecodeFiles
  59 +{
  60 + static void Main(string[] args)
  61 + {
  62 + string usage = @"
  63 +-----------------------------
  64 +transducer Usage:
  65 + --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `
  66 + --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `
  67 + --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `
  68 + --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `
  69 + --num-threads=2 `
  70 + --decoding-method=greedy_search `
  71 + --debug=false `
  72 + ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav
  73 +
  74 +paraformer Usage:
  75 + --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `
  76 + --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `
  77 + --num-threads=2 `
  78 + --decoding-method=greedy_search `
  79 + --debug=false `
  80 + ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav
  81 +
  82 +nemo Usage:
  83 + --tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt `
  84 + --nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx `
  85 + --num-threads=2 `
  86 + --decoding-method=greedy_search `
  87 + --debug=false `
  88 + ./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav
  89 +-----------------------------
  90 +";
  91 + if (args.Length == 0)
  92 + {
  93 + System.Console.WriteLine("Please enter the correct parameters:");
  94 + System.Console.WriteLine(usage);
  95 + System.Text.StringBuilder sb = new System.Text.StringBuilder();
  96 + //args = Console.ReadLine().Split(" ");
  97 + while (true)
  98 + {
  99 + string input = Console.ReadLine();
  100 + sb.AppendLine(input);
  101 + if (Console.ReadKey().Key == ConsoleKey.Enter)
  102 + break;
  103 + }
  104 + args = sb.ToString().Split("\r\n");
  105 + }
  106 + Console.WriteLine("Started!\n");
  107 + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
  108 + List<string> wavFiles = new List<string>();
  109 + Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
  110 + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
  111 + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
  112 + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
  113 + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
  114 + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
  115 + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
  116 + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
  117 + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
  118 + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
  119 +
  120 + OfflineTransducer offlineTransducer = new OfflineTransducer();
  121 + offlineTransducer.EncoderFilename = encoder;
  122 + offlineTransducer.DecoderFilename = decoder;
  123 + offlineTransducer.JoinerFilename = joiner;
  124 +
  125 + OfflineParaformer offlineParaformer = new OfflineParaformer();
  126 + offlineParaformer.Model = paraformer;
  127 +
  128 + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
  129 + offlineNemoEncDecCtc.Model = nemo_ctc;
  130 +
  131 + int numThreads = 0;
  132 + int.TryParse(num_threads, out numThreads);
  133 + bool isDebug = false;
  134 + bool.TryParse(debug, out isDebug);
  135 +
  136 + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
  137 +
  138 + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
  139 + && string.IsNullOrEmpty(paraformer)
  140 + && string.IsNullOrEmpty(nemo_ctc))
  141 + {
  142 + Console.WriteLine("Please specify at least one model");
  143 + Console.WriteLine(usage);
  144 + }
  145 + // batch decode
  146 + TimeSpan total_duration = TimeSpan.Zero;
  147 + TimeSpan start_time = TimeSpan.Zero;
  148 + TimeSpan end_time = TimeSpan.Zero;
  149 + List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
  150 + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
  151 + {
  152 + OfflineRecognizer<OfflineTransducer> offlineRecognizer = new OfflineRecognizer<OfflineTransducer>(
  153 + offlineTransducer,
  154 + tokens,
  155 + num_threads: numThreads,
  156 + debug: isDebug,
  157 + decoding_method: decodingMethod);
  158 + List<float[]> samplesList = new List<float[]>();
  159 + foreach (string wavFile in wavFiles)
  160 + {
  161 + TimeSpan duration = TimeSpan.Zero;
  162 + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
  163 + samplesList.Add(samples);
  164 + total_duration += duration;
  165 + }
  166 + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
  167 + start_time = new TimeSpan(DateTime.Now.Ticks);
  168 + offlineRecognizer.DecodeMultipleOfflineStreams(streams);
  169 + results = offlineRecognizer.GetResults(streams);
  170 + end_time = new TimeSpan(DateTime.Now.Ticks);
  171 + }
  172 + else if (!string.IsNullOrEmpty(paraformer))
  173 + {
  174 + OfflineRecognizer<OfflineParaformer> offlineRecognizer = new OfflineRecognizer<OfflineParaformer>(
  175 + offlineParaformer,
  176 + tokens,
  177 + num_threads: numThreads,
  178 + debug: isDebug,
  179 + decoding_method: decodingMethod);
  180 + List<float[]> samplesList = new List<float[]>();
  181 + foreach (string wavFile in wavFiles)
  182 + {
  183 + TimeSpan duration = TimeSpan.Zero;
  184 + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
  185 + samplesList.Add(samples);
  186 + total_duration += duration;
  187 + }
  188 + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
  189 + start_time = new TimeSpan(DateTime.Now.Ticks);
  190 + offlineRecognizer.DecodeMultipleOfflineStreams(streams);
  191 + results = offlineRecognizer.GetResults(streams);
  192 + end_time = new TimeSpan(DateTime.Now.Ticks);
  193 + }
  194 + else if (!string.IsNullOrEmpty(nemo_ctc))
  195 + {
  196 + OfflineRecognizer<OfflineNemoEncDecCtc> offlineRecognizer = new OfflineRecognizer<OfflineNemoEncDecCtc>(
  197 + offlineNemoEncDecCtc,
  198 + tokens,
  199 + num_threads: numThreads,
  200 + debug: isDebug,
  201 + decoding_method: decodingMethod);
  202 + List<float[]> samplesList = new List<float[]>();
  203 + foreach (string wavFile in wavFiles)
  204 + {
  205 + TimeSpan duration = TimeSpan.Zero;
  206 + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
  207 + samplesList.Add(samples);
  208 + total_duration += duration;
  209 + }
  210 + OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);
  211 + start_time = new TimeSpan(DateTime.Now.Ticks);
  212 + offlineRecognizer.DecodeMultipleOfflineStreams(streams);
  213 + results = offlineRecognizer.GetResults(streams);
  214 + end_time = new TimeSpan(DateTime.Now.Ticks);
  215 + }
  216 +
  217 + foreach (var item in results.Zip<OfflineRecognizerResultEntity, string>(wavFiles))
  218 + {
  219 + Console.WriteLine("wavFile:{0}", item.Second);
  220 + Console.WriteLine("text:{0}", item.First.text.ToLower());
  221 + Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());
  222 + }
  223 +
  224 + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
  225 + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
  226 + Console.WriteLine("num_threads:{0}", num_threads);
  227 + Console.WriteLine("decoding_method:{0}", decodingMethod);
  228 + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
  229 + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
  230 + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
  231 +
  232 + Console.WriteLine("End!");
  233 + }
  234 +
  235 + static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
  236 + {
  237 + Dictionary<string, string> argsDict = new Dictionary<string, string>();
  238 + foreach (string input in args)
  239 + {
  240 + string[] ss = input.Split("=");
  241 + if (ss.Length == 1)
  242 + {
  243 + if (!string.IsNullOrEmpty(ss[0]))
  244 + {
  245 + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
  246 + }
  247 + }
  248 + else
  249 + {
  250 + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
  251 + }
  252 + }
  253 + return argsDict;
  254 + }
  255 +}
  1 +// See https://aka.ms/new-console-template for more information
  2 +// Copyright (c) 2023 by manyeyes
  3 +using SherpaOnnx;
  4 +/// Please refer to
  5 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  6 +/// to download pre-trained models. That is, you can find encoder-xxx.onnx
  7 +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
  8 +/// from there.
  9 +
  10 +/// download model eg:
  11 +/// (The directory where the application runs)
  12 +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
  13 +/// cd /path/to
  14 +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  15 +
  16 +/// NuGet for sherpa-onnx
  17 +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
  18 +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
  19 +
  20 +// transducer Usage:
  21 +/*
  22 + .\SherpaOnnx.Examples.exe `
  23 + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
  24 + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
  25 + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
  26 + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
  27 + --num-threads=2 `
  28 + --decoding-method=modified_beam_search `
  29 + --debug=false `
  30 + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav
  31 + */
  32 +
  33 +internal class OnlineDecodeFile
  34 +{
  35 + static void Main(string[] args)
  36 + {
  37 + string usage = @"
  38 +-----------------------------
  39 +transducer Usage:
  40 + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
  41 + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
  42 + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
  43 + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
  44 + --num-threads=2 `
  45 + --decoding-method=modified_beam_search `
  46 + --debug=false `
  47 + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav
  48 +-----------------------------
  49 +";
  50 + if (args.Length == 0)
  51 + {
  52 + System.Console.WriteLine("Please enter the correct parameters:");
  53 + System.Console.WriteLine(usage);
  54 + System.Text.StringBuilder sb = new System.Text.StringBuilder();
  55 + //args = Console.ReadLine().Split(" ");
  56 + while (true)
  57 + {
  58 + string input = Console.ReadLine();
  59 + sb.AppendLine(input);
  60 + if (Console.ReadKey().Key == ConsoleKey.Enter)
  61 + break;
  62 + }
  63 + args = sb.ToString().Split("\r\n");
  64 + }
  65 + Console.WriteLine("Started!\n");
  66 + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
  67 + List<string> wavFiles = new List<string>();
  68 + Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
  69 + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
  70 + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
  71 + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
  72 + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
  73 + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
  74 + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
  75 + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
  76 + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
  77 + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
  78 +
  79 + OfflineTransducer offlineTransducer = new OfflineTransducer();
  80 + offlineTransducer.EncoderFilename = encoder;
  81 + offlineTransducer.DecoderFilename = decoder;
  82 + offlineTransducer.JoinerFilename = joiner;
  83 +
  84 + OfflineParaformer offlineParaformer = new OfflineParaformer();
  85 + offlineParaformer.Model = paraformer;
  86 +
  87 + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
  88 + offlineNemoEncDecCtc.Model = nemo_ctc;
  89 +
  90 + int numThreads = 0;
  91 + int.TryParse(num_threads, out numThreads);
  92 + bool isDebug = false;
  93 + bool.TryParse(debug, out isDebug);
  94 +
  95 + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
  96 +
  97 + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
  98 + && string.IsNullOrEmpty(paraformer)
  99 + && string.IsNullOrEmpty(nemo_ctc))
  100 + {
  101 + Console.WriteLine("Please specify at least one model");
  102 + Console.WriteLine(usage);
  103 + }
  104 + // batch decode
  105 + TimeSpan total_duration = TimeSpan.Zero;
  106 + TimeSpan start_time = TimeSpan.Zero;
  107 + TimeSpan end_time = TimeSpan.Zero;
  108 + List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
  109 + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
  110 + {
  111 + OnlineTransducer onlineTransducer = new OnlineTransducer();
  112 + onlineTransducer.EncoderFilename = encoder;
  113 + onlineTransducer.DecoderFilename = decoder;
  114 + onlineTransducer.JoinerFilename = joiner;
  115 + //test online
  116 + OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
  117 + onlineTransducer,
  118 + tokens,
  119 + num_threads: numThreads,
  120 + debug: isDebug,
  121 + decoding_method: decodingMethod);
  122 + foreach (string wavFile in wavFiles)
  123 + {
  124 + TimeSpan duration = TimeSpan.Zero;
  125 + List<float[]> samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration);
  126 + OnlineStream stream = onlineRecognizer.CreateStream();
  127 + start_time = new TimeSpan(DateTime.Now.Ticks);
  128 + for (int i = 0; i < samplesList.Count; i++)
  129 + {
  130 + onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]);
  131 + onlineRecognizer.DecodeStream(stream);
  132 + OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream);
  133 + Console.WriteLine(result_on.text);
  134 + }
  135 + total_duration += duration;
  136 + }
  137 + end_time = new TimeSpan(DateTime.Now.Ticks);
  138 + }
  139 + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
  140 + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
  141 + Console.WriteLine("num_threads:{0}", num_threads);
  142 + Console.WriteLine("decoding_method:{0}", decodingMethod);
  143 + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
  144 + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
  145 + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
  146 +
  147 + Console.WriteLine("End!");
  148 + }
  149 +
  150 + static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
  151 + {
  152 + Dictionary<string, string> argsDict = new Dictionary<string, string>();
  153 + foreach (string input in args)
  154 + {
  155 + string[] ss = input.Split("=");
  156 + if (ss.Length == 1)
  157 + {
  158 + if (!string.IsNullOrEmpty(ss[0]))
  159 + {
  160 + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
  161 + }
  162 + }
  163 + else
  164 + {
  165 + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
  166 + }
  167 + }
  168 + return argsDict;
  169 + }
  170 +
  171 +}
  1 +// See https://aka.ms/new-console-template for more information
  2 +// Copyright (c) 2023 by manyeyes
  3 +using SherpaOnnx;
  4 +/// Please refer to
  5 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  6 +/// to download pre-trained models. That is, you can find encoder-xxx.onnx
  7 +/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
  8 +/// from there.
  9 +
  10 +/// download model eg:
  11 +/// (The directory where the application runs)
  12 +/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory
  13 +/// cd /path/to
  14 +/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  15 +
  16 +/// NuGet for sherpa-onnx
  17 +/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx
  18 +/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
  19 +
  20 +// transducer Usage:
  21 +/*
  22 + .\SherpaOnnx.Examples.exe `
  23 + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
  24 + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
  25 + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
  26 + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
  27 + --num-threads=2 `
  28 + --decoding-method=modified_beam_search `
  29 + --debug=false `
  30 + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `
  31 + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
  32 + */
  33 +
  34 +internal class OnlineDecodeFiles
  35 +{
  36 + static void Main(string[] args)
  37 + {
  38 + string usage = @"
  39 +-----------------------------
  40 +transducer Usage:
  41 + --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `
  42 + --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `
  43 + --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `
  44 + --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `
  45 + --num-threads=2 `
  46 + --decoding-method=modified_beam_search `
  47 + --debug=false `
  48 + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `
  49 + ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
  50 +-----------------------------
  51 +";
  52 + if (args.Length == 0)
  53 + {
  54 + System.Console.WriteLine("Please enter the correct parameters:");
  55 + System.Console.WriteLine(usage);
  56 + System.Text.StringBuilder sb = new System.Text.StringBuilder();
  57 + //args = Console.ReadLine().Split(" ");
  58 + while (true)
  59 + {
  60 + string input = Console.ReadLine();
  61 + sb.AppendLine(input);
  62 + if (Console.ReadKey().Key == ConsoleKey.Enter)
  63 + break;
  64 + }
  65 + args = sb.ToString().Split("\r\n");
  66 + }
  67 + Console.WriteLine("Started!\n");
  68 + string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;
  69 + List<string> wavFiles = new List<string>();
  70 + Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);
  71 + string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";
  72 + string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";
  73 + string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";
  74 + string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";
  75 + string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";
  76 + string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";
  77 + string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";
  78 + string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";
  79 + string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";
  80 +
  81 + OfflineTransducer offlineTransducer = new OfflineTransducer();
  82 + offlineTransducer.EncoderFilename = encoder;
  83 + offlineTransducer.DecoderFilename = decoder;
  84 + offlineTransducer.JoinerFilename = joiner;
  85 +
  86 + OfflineParaformer offlineParaformer = new OfflineParaformer();
  87 + offlineParaformer.Model = paraformer;
  88 +
  89 + OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();
  90 + offlineNemoEncDecCtc.Model = nemo_ctc;
  91 +
  92 + int numThreads = 0;
  93 + int.TryParse(num_threads, out numThreads);
  94 + bool isDebug = false;
  95 + bool.TryParse(debug, out isDebug);
  96 +
  97 + string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;
  98 +
  99 + if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))
  100 + && string.IsNullOrEmpty(paraformer)
  101 + && string.IsNullOrEmpty(nemo_ctc))
  102 + {
  103 + Console.WriteLine("Please specify at least one model");
  104 + Console.WriteLine(usage);
  105 + }
  106 + // batch decode
  107 + TimeSpan total_duration = TimeSpan.Zero;
  108 + TimeSpan start_time = TimeSpan.Zero;
  109 + TimeSpan end_time = TimeSpan.Zero;
  110 + List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();
  111 + if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))
  112 + {
  113 + OnlineTransducer onlineTransducer = new OnlineTransducer();
  114 + onlineTransducer.EncoderFilename = encoder;
  115 + onlineTransducer.DecoderFilename = decoder;
  116 + onlineTransducer.JoinerFilename = joiner;
  117 + //test online
  118 + OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
  119 + onlineTransducer,
  120 + tokens,
  121 + num_threads: numThreads,
  122 + debug: isDebug,
  123 + decoding_method: decodingMethod);
  124 + List<float[]> samplesList = new List<float[]>();
  125 + foreach (string wavFile in wavFiles)
  126 + {
  127 + TimeSpan duration = TimeSpan.Zero;
  128 + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
  129 + samplesList.Add(samples);
  130 + total_duration += duration;
  131 + }
  132 + start_time = new TimeSpan(DateTime.Now.Ticks);
  133 + List<OnlineStream> streams = new List<OnlineStream>();
  134 + foreach (float[] samples in samplesList)
  135 + {
  136 + OnlineStream stream = onlineRecognizer.CreateStream();
  137 + onlineRecognizer.AcceptWaveForm(stream, 16000, samples);
  138 + streams.Add(stream);
  139 + onlineRecognizer.InputFinished(stream);
  140 + }
  141 + onlineRecognizer.DecodeMultipleStreams(streams);
  142 + results = onlineRecognizer.GetResults(streams);
  143 + foreach (OnlineRecognizerResultEntity result in results)
  144 + {
  145 + Console.WriteLine(result.text);
  146 + }
  147 + end_time = new TimeSpan(DateTime.Now.Ticks);
  148 + }
  149 +
  150 +
  151 + foreach (var item in results.Zip<OnlineRecognizerResultEntity, string>(wavFiles))
  152 + {
  153 + Console.WriteLine("wavFile:{0}", item.Second);
  154 + Console.WriteLine("text:{0}", item.First.text.ToLower());
  155 + Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());
  156 + }
  157 +
  158 + double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
  159 + double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
  160 + Console.WriteLine("num_threads:{0}", num_threads);
  161 + Console.WriteLine("decoding_method:{0}", decodingMethod);
  162 + Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
  163 + Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
  164 + Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());
  165 +
  166 + Console.WriteLine("End!");
  167 + }
  168 +
  169 + public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List<string> wavFiles, ref TimeSpan total_duration)
  170 + {
  171 + OnlineTransducer onlineTransducer = new OnlineTransducer();
  172 + onlineTransducer.EncoderFilename = encoder;
  173 + onlineTransducer.DecoderFilename = decoder;
  174 + onlineTransducer.JoinerFilename = joiner;
  175 + //test online
  176 + OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(
  177 + onlineTransducer,
  178 + tokens,
  179 + num_threads: numThreads,
  180 + debug: isDebug,
  181 + decoding_method: decodingMethod);
  182 + List<float[]> samplesList = new List<float[]>();
  183 + foreach (string wavFile in wavFiles)
  184 + {
  185 + TimeSpan duration = TimeSpan.Zero;
  186 + float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);
  187 + samplesList.Add(samples);
  188 + total_duration += duration;
  189 + }
  190 + TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
  191 + List<OnlineStream> streams = onlineRecognizer.CreateStreams(samplesList);
  192 + onlineRecognizer.DecodeMultipleStreams(streams);
  193 + List<OnlineRecognizerResultEntity> results = onlineRecognizer.GetResults(streams);
  194 + foreach (OnlineRecognizerResultEntity result in results)
  195 + {
  196 + Console.WriteLine(result.text);
  197 + }
  198 + TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
  199 + }
  200 +
  201 + static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)
  202 + {
  203 + Dictionary<string, string> argsDict = new Dictionary<string, string>();
  204 + foreach (string input in args)
  205 + {
  206 + string[] ss = input.Split("=");
  207 + if (ss.Length == 1)
  208 + {
  209 + if (!string.IsNullOrEmpty(ss[0]))
  210 + {
  211 + wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));
  212 + }
  213 + }
  214 + else
  215 + {
  216 + argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));
  217 + }
  218 + }
  219 + return argsDict;
  220 + }
  221 +}
  1 +#ProjectReference csharp-api
  2 +`<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />`
  3 +The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api.
  4 +This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments.
  5 +
  6 +------------
  7 +Alternatively, install sherpaonnx through nuget.
  8 +#NuGet for sherpa-onnx
  9 +PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx
  1 +using NAudio.Wave;
  2 +using System;
  3 +using System.Collections.Generic;
  4 +using System.Diagnostics;
  5 +using System.Linq;
  6 +using System.Text;
  7 +using System.Threading.Tasks;
  8 +
  9 +/// <summary>
  10 +/// audio processing
  11 +/// Copyright (c) 2023 by manyeyes
  12 +/// </summary>
  13 +public class AudioHelper
  14 +{
  15 + public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration)
  16 + {
  17 + if (!File.Exists(wavFilePath))
  18 + {
  19 + Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);
  20 + return new float[1];
  21 + }
  22 + AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
  23 + byte[] datas = new byte[_audioFileReader.Length];
  24 + _audioFileReader.Read(datas, 0, datas.Length);
  25 + duration = _audioFileReader.TotalTime;
  26 + float[] wavdata = new float[datas.Length / sizeof(float)];
  27 + Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
  28 + return wavdata;
  29 + }
  30 +
  31 + public static List<float[]> GetChunkSamplesList(string wavFilePath, ref TimeSpan duration)
  32 + {
  33 + List<float[]> wavdatas = new List<float[]>();
  34 + if (!File.Exists(wavFilePath))
  35 + {
  36 + Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);
  37 + wavdatas.Add(new float[1]);
  38 + return wavdatas;
  39 + }
  40 + AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
  41 + byte[] datas = new byte[_audioFileReader.Length];
  42 + int chunkSize = 16000;// datas.Length / sizeof(float);
  43 + int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize);
  44 + for (int i = 0; i < chunkNum; i++)
  45 + {
  46 + int offset = 0;
  47 + int dataCount = 0;
  48 + if (Math.Abs(datas.Length - i * chunkSize) > chunkSize)
  49 + {
  50 + offset = i * chunkSize;
  51 + dataCount = chunkSize;
  52 + }
  53 + else
  54 + {
  55 + offset = i * chunkSize;
  56 + dataCount = datas.Length - i * chunkSize;
  57 + }
  58 + _audioFileReader.Read(datas, offset, dataCount);
  59 + duration += _audioFileReader.TotalTime;
  60 + float[] wavdata = new float[chunkSize / sizeof(float)];
  61 + Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount);
  62 + wavdatas.Add(wavdata);
  63 +
  64 + }
  65 + return wavdatas;
  66 + }
  67 +}
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
  5 + <TargetFramework>net6.0</TargetFramework>
  6 + <RootNamespace>sherpa_onnx</RootNamespace>
  7 + <ImplicitUsings>enable</ImplicitUsings>
  8 + <Nullable>enable</Nullable>
  9 + <StartupObject>OnlineDecodeFiles</StartupObject>
  10 + </PropertyGroup>
  11 +
  12 + <ItemGroup>
  13 + <PackageReference Include="NAudio" Version="2.1.0" />
  14 + </ItemGroup>
  15 +
  16 + <ItemGroup>
  17 + <ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />
  18 + </ItemGroup>
  19 +
  20 +</Project>
  1 +using System.Runtime.InteropServices;
  2 +using System.Diagnostics;
  3 +
  4 +namespace SherpaOnnx
  5 +{
  6 + /// <summary>
  7 + /// online recognizer package
  8 + /// Copyright (c) 2023 by manyeyes
  9 + /// </summary>
  10 + public class OnlineBase : IDisposable
  11 + {
  12 + public void Dispose()
  13 + {
  14 + Dispose(disposing: true);
  15 + GC.SuppressFinalize(this);
  16 + }
  17 + protected virtual void Dispose(bool disposing)
  18 + {
  19 + if (!disposing)
  20 + {
  21 + if (_onlineRecognizerResult != IntPtr.Zero)
  22 + {
  23 + SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);
  24 + _onlineRecognizerResult = IntPtr.Zero;
  25 + }
  26 + if (_onlineStream.impl != IntPtr.Zero)
  27 + {
  28 + SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);
  29 + _onlineStream.impl = IntPtr.Zero;
  30 + }
  31 + if (_onlineRecognizer.impl != IntPtr.Zero)
  32 + {
  33 + SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);
  34 + _onlineRecognizer.impl = IntPtr.Zero;
  35 + }
  36 + this._disposed = true;
  37 + }
  38 + }
  39 + ~OnlineBase()
  40 + {
  41 + Dispose(this._disposed);
  42 + }
  43 + internal SherpaOnnxOnlineStream _onlineStream;
  44 + internal IntPtr _onlineRecognizerResult;
  45 + internal SherpaOnnxOnlineRecognizer _onlineRecognizer;
  46 + internal bool _disposed = false;
  47 + }
  48 + public class OnlineStream : OnlineBase
  49 + {
  50 + internal OnlineStream(SherpaOnnxOnlineStream onlineStream)
  51 + {
  52 + this._onlineStream = onlineStream;
  53 + }
  54 + protected override void Dispose(bool disposing)
  55 + {
  56 + if (!disposing)
  57 + {
  58 + SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);
  59 + _onlineStream.impl = IntPtr.Zero;
  60 + this._disposed = true;
  61 + base.Dispose();
  62 + }
  63 + }
  64 + }
  65 + public class OnlineRecognizerResult : OnlineBase
  66 + {
  67 + internal OnlineRecognizerResult(IntPtr onlineRecognizerResult)
  68 + {
  69 + this._onlineRecognizerResult = onlineRecognizerResult;
  70 + }
  71 + protected override void Dispose(bool disposing)
  72 + {
  73 + if (!disposing)
  74 + {
  75 + SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);
  76 + _onlineRecognizerResult = IntPtr.Zero;
  77 + this._disposed = true;
  78 + base.Dispose(disposing);
  79 + }
  80 + }
  81 + }
  82 + public class OnlineRecognizer<T> : OnlineBase
  83 + where T : class, new()
  84 + {
  85 +
  86 + public OnlineRecognizer(T t,
  87 + string tokensFilePath, string decoding_method = "greedy_search",
  88 + int sample_rate = 16000, int feature_dim = 80,
  89 + int num_threads = 2, bool debug = false, int max_active_paths = 4,
  90 + int enable_endpoint=0,int rule1_min_trailing_silence=0,
  91 + int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0)
  92 + {
  93 + SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer();
  94 + SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig();
  95 + if (t is not null && t.GetType() == typeof(OnlineTransducer))
  96 + {
  97 + OnlineTransducer? onlineTransducer = t as OnlineTransducer;
  98 +#pragma warning disable CS8602 // 解引用可能出现空引用。
  99 + Trace.Assert(File.Exists(onlineTransducer.DecoderFilename)
  100 + && File.Exists(onlineTransducer.EncoderFilename)
  101 + && File.Exists(onlineTransducer.JoinerFilename), "Please provide a model");
  102 +#pragma warning restore CS8602 // 解引用可能出现空引用。
  103 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
  104 + Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
  105 + transducer.encoder_filename = onlineTransducer.EncoderFilename;
  106 + transducer.decoder_filename = onlineTransducer.DecoderFilename;
  107 + transducer.joiner_filename = onlineTransducer.JoinerFilename;
  108 + }
  109 +
  110 + model_config.transducer = transducer;
  111 + model_config.num_threads = num_threads;
  112 + model_config.debug = debug;
  113 + model_config.tokens = tokensFilePath;
  114 +
  115 + SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();
  116 + feat_config.sample_rate = sample_rate;
  117 + feat_config.feature_dim = feature_dim;
  118 +
  119 + SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig;
  120 + sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method;
  121 + sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config;
  122 + sherpaOnnxOnlineRecognizerConfig.model_config = model_config;
  123 + sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths;
  124 + //endpoint
  125 + sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint;
  126 + sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence;
  127 + sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence;
  128 + sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length;
  129 +
  130 + _onlineRecognizer =
  131 + SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig);
  132 + }
  133 + internal OnlineStream CreateOnlineStream()
  134 + {
  135 + SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer);
  136 + return new OnlineStream(stream);
  137 + }
  138 + public void InputFinished(OnlineStream stream)
  139 + {
  140 + SherpaOnnxSharp.InputFinished(stream._onlineStream);
  141 + }
  142 + public List<OnlineStream> CreateStreams(List<float[]> samplesList)
  143 + {
  144 + int batch_size = samplesList.Count;
  145 + List<OnlineStream> streams = new List<OnlineStream>();
  146 + for (int i = 0; i < batch_size; i++)
  147 + {
  148 + OnlineStream stream = CreateOnlineStream();
  149 + AcceptWaveform(stream._onlineStream, 16000, samplesList[i]);
  150 + InputFinished(stream);
  151 + streams.Add(stream);
  152 + }
  153 + return streams;
  154 + }
  155 + public OnlineStream CreateStream()
  156 + {
  157 + OnlineStream stream = CreateOnlineStream();
  158 + return stream;
  159 + }
  160 + internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples)
  161 + {
  162 + SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length);
  163 + }
  164 + public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples)
  165 + {
  166 + AcceptWaveform(stream._onlineStream, sample_rate, samples);
  167 + }
  168 + internal IntPtr GetStreamsIntPtr(OnlineStream[] streams)
  169 + {
  170 + int streams_len = streams.Length;
  171 + int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream));
  172 + IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);
  173 + unsafe
  174 + {
  175 + byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());
  176 + for (int i = 0; i < streams_len; i++, ptrbds += (size))
  177 + {
  178 + IntPtr streamIntptr = new IntPtr(ptrbds);
  179 + Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false);
  180 + }
  181 +
  182 + }
  183 + return streamsIntPtr;
  184 + }
  185 + internal bool IsReady(OnlineStream stream)
  186 + {
  187 + return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0;
  188 + }
  189 + public void DecodeMultipleStreams(List<OnlineStream> streams)
  190 + {
  191 + while (true)
  192 + {
  193 + List<OnlineStream> streamList = new List<OnlineStream>();
  194 + foreach (OnlineStream stream in streams)
  195 + {
  196 + if (IsReady(stream))
  197 + {
  198 + streamList.Add(stream);
  199 + }
  200 + }
  201 + if (streamList.Count == 0)
  202 + {
  203 + break;
  204 + }
  205 + OnlineStream[] streamsBatch = new OnlineStream[streamList.Count];
  206 + for (int i = 0; i < streamsBatch.Length; i++)
  207 + {
  208 + streamsBatch[i] = streamList[i];
  209 + }
  210 + streamList.Clear();
  211 + IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch);
  212 + SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length);
  213 + Marshal.FreeHGlobal(streamsIntPtr);
  214 + }
  215 + }
  216 + public void DecodeStream(OnlineStream stream)
  217 + {
  218 + while (IsReady(stream))
  219 + {
  220 + SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream);
  221 + }
  222 + }
  223 + internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream)
  224 + {
  225 + IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream);
  226 + OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip);
  227 +#pragma warning disable CS8605 // 取消装箱可能为 null 的值。
  228 + SherpaOnnxOnlineRecognizerResult result =
  229 + (SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure(
  230 + onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult));
  231 +#pragma warning restore CS8605 // 取消装箱可能为 null 的值。
  232 +
  233 +#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
  234 + string text = Marshal.PtrToStringAnsi(result.text);
  235 +#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
  236 + OnlineRecognizerResultEntity onlineRecognizerResultEntity =
  237 + new OnlineRecognizerResultEntity();
  238 + onlineRecognizerResultEntity.text = text;
  239 + onlineRecognizerResultEntity.text_len = result.text_len;
  240 +
  241 + return onlineRecognizerResultEntity;
  242 + }
  243 + public OnlineRecognizerResultEntity GetResult(OnlineStream stream)
  244 + {
  245 + OnlineRecognizerResultEntity result = GetResult(stream._onlineStream);
  246 + return result;
  247 + }
  248 + public List<OnlineRecognizerResultEntity> GetResults(List<OnlineStream> streams)
  249 + {
  250 + List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();
  251 + foreach (OnlineStream stream in streams)
  252 + {
  253 + OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream);
  254 + results.Add(onlineRecognizerResultEntity);
  255 + }
  256 + return results;
  257 + }
  258 + protected override void Dispose(bool disposing)
  259 + {
  260 + if (!disposing)
  261 + {
  262 + SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);
  263 + _onlineRecognizer.impl = IntPtr.Zero;
  264 + this._disposed = true;
  265 + base.Dispose();
  266 + }
  267 + }
  268 + }
  269 + public class OfflineBase : IDisposable
  270 + {
  271 + public void Dispose()
  272 + {
  273 + Dispose(disposing: true);
  274 + GC.SuppressFinalize(this);
  275 + }
  276 + protected virtual void Dispose(bool disposing)
  277 + {
  278 + if (!disposing)
  279 + {
  280 + if (_offlineRecognizerResult != IntPtr.Zero)
  281 + {
  282 + SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);
  283 + _offlineRecognizerResult = IntPtr.Zero;
  284 + }
  285 + if (_offlineStream.impl != IntPtr.Zero)
  286 + {
  287 + SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);
  288 + _offlineStream.impl = IntPtr.Zero;
  289 + }
  290 + if (_offlineRecognizer.impl != IntPtr.Zero)
  291 + {
  292 + SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);
  293 + _offlineRecognizer.impl = IntPtr.Zero;
  294 + }
  295 + this._disposed = true;
  296 + }
  297 + }
  298 + ~OfflineBase()
  299 + {
  300 + Dispose(this._disposed);
  301 + }
  302 + internal SherpaOnnxOfflineStream _offlineStream;
  303 + internal IntPtr _offlineRecognizerResult;
  304 + internal SherpaOnnxOfflineRecognizer _offlineRecognizer;
  305 + internal bool _disposed = false;
  306 + }
  307 + public class OfflineStream : OfflineBase
  308 + {
  309 + internal OfflineStream(SherpaOnnxOfflineStream offlineStream)
  310 + {
  311 + this._offlineStream = offlineStream;
  312 + }
  313 +
  314 + protected override void Dispose(bool disposing)
  315 + {
  316 + if (!disposing)
  317 + {
  318 + SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);
  319 + _offlineStream.impl = IntPtr.Zero;
  320 + this._disposed = true;
  321 + base.Dispose();
  322 + }
  323 + }
  324 + }
  325 + public class OfflineRecognizerResult : OfflineBase
  326 + {
  327 + internal OfflineRecognizerResult(IntPtr offlineRecognizerResult)
  328 + {
  329 + this._offlineRecognizerResult = offlineRecognizerResult;
  330 + }
  331 + protected override void Dispose(bool disposing)
  332 + {
  333 + if (!disposing)
  334 + {
  335 + SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);
  336 + _offlineRecognizerResult = IntPtr.Zero;
  337 + this._disposed = true;
  338 + base.Dispose(disposing);
  339 + }
  340 + }
  341 + }
  342 + public class OfflineRecognizer<T> : OfflineBase
  343 + where T : class, new()
  344 + {
  345 + public OfflineRecognizer(T t,
  346 + string tokensFilePath, string decoding_method = "greedy_search",
  347 + int sample_rate = 16000, int feature_dim = 80,
  348 + int num_threads = 2, bool debug = false)
  349 + {
  350 + SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer();
  351 + SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer();
  352 + SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc();
  353 + SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig();
  354 + if (t is not null && t.GetType() == typeof(OfflineTransducer))
  355 + {
  356 + OfflineTransducer? offlineTransducer = t as OfflineTransducer;
  357 +#pragma warning disable CS8602 // 解引用可能出现空引用。
  358 + Trace.Assert(File.Exists(offlineTransducer.DecoderFilename)
  359 + && File.Exists(offlineTransducer.EncoderFilename)
  360 + && File.Exists(offlineTransducer.JoinerFilename), "Please provide a model");
  361 +#pragma warning restore CS8602 // 解引用可能出现空引用。
  362 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
  363 + Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
  364 + transducer.encoder_filename = offlineTransducer.EncoderFilename;
  365 + transducer.decoder_filename = offlineTransducer.DecoderFilename;
  366 + transducer.joiner_filename = offlineTransducer.JoinerFilename;
  367 + }
  368 + else if (t is not null && t.GetType() == typeof(OfflineParaformer))
  369 + {
  370 + OfflineParaformer? offlineParaformer = t as OfflineParaformer;
  371 +#pragma warning disable CS8602 // 解引用可能出现空引用。
  372 + Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model");
  373 +#pragma warning restore CS8602 // 解引用可能出现空引用。
  374 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
  375 + Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
  376 + paraformer.model = offlineParaformer.Model;
  377 + }
  378 + else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc))
  379 + {
  380 + OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc;
  381 +#pragma warning disable CS8602 // 解引用可能出现空引用。
  382 + Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model");
  383 +#pragma warning restore CS8602 // 解引用可能出现空引用。
  384 + Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");
  385 + Trace.Assert(num_threads > 0, "num_threads must be greater than 0");
  386 + nemo_ctc.model = offlineNemoEncDecCtc.Model;
  387 + }
  388 +
  389 + model_config.transducer = transducer;
  390 + model_config.paraformer = paraformer;
  391 + model_config.nemo_ctc = nemo_ctc;
  392 + model_config.num_threads = num_threads;
  393 + model_config.debug = debug;
  394 + model_config.tokens = tokensFilePath;
  395 +
  396 + SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();
  397 + feat_config.sample_rate = sample_rate;
  398 + feat_config.feature_dim = feature_dim;
  399 +
  400 + SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig;
  401 + sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method;
  402 + sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config;
  403 + sherpaOnnxOfflineRecognizerConfig.model_config = model_config;
  404 +
  405 + _offlineRecognizer =
  406 + SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig);
  407 + }
  408 + internal OfflineStream CreateOfflineStream()
  409 + {
  410 + SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer);
  411 + return new OfflineStream(stream);
  412 + }
  413 + public OfflineStream[] CreateOfflineStream(List<float[]> samplesList)
  414 + {
  415 + int batch_size = samplesList.Count;
  416 + OfflineStream[] streams = new OfflineStream[batch_size];
  417 + List<string> wavFiles = new List<string>();
  418 + for (int i = 0; i < batch_size; i++)
  419 + {
  420 + OfflineStream stream = CreateOfflineStream();
  421 + AcceptWaveform(stream._offlineStream, 16000, samplesList[i]);
  422 + streams[i] = stream;
  423 + }
  424 + return streams;
  425 + }
  426 + internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples)
  427 + {
  428 + SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length);
  429 + }
  430 + internal IntPtr GetStreamsIntPtr(OfflineStream[] streams)
  431 + {
  432 + int streams_len = streams.Length;
  433 + int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream));
  434 + IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);
  435 + unsafe
  436 + {
  437 + byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());
  438 + for (int i = 0; i < streams_len; i++, ptrbds += (size))
  439 + {
  440 + IntPtr streamIntptr = new IntPtr(ptrbds);
  441 + Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false);
  442 + }
  443 + }
  444 + return streamsIntPtr;
  445 + }
  446 + public void DecodeMultipleOfflineStreams(OfflineStream[] streams)
  447 + {
  448 + IntPtr streamsIntPtr = GetStreamsIntPtr(streams);
  449 + SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length);
  450 + Marshal.FreeHGlobal(streamsIntPtr);
  451 + }
  452 + internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream)
  453 + {
  454 + IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream);
  455 + OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip);
  456 +#pragma warning disable CS8605 // 取消装箱可能为 null 的值。
  457 + SherpaOnnxOfflineRecognizerResult result =
  458 + (SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure(
  459 + offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult));
  460 +#pragma warning restore CS8605 // 取消装箱可能为 null 的值。
  461 +
  462 +#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
  463 + string text = Marshal.PtrToStringAnsi(result.text);
  464 +#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。
  465 + OfflineRecognizerResultEntity offlineRecognizerResultEntity =
  466 + new OfflineRecognizerResultEntity();
  467 + offlineRecognizerResultEntity.text = text;
  468 + offlineRecognizerResultEntity.text_len = result.text_len;
  469 +
  470 + return offlineRecognizerResultEntity;
  471 + }
  472 + public List<OfflineRecognizerResultEntity> GetResults(OfflineStream[] streams)
  473 + {
  474 + List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();
  475 + foreach (OfflineStream stream in streams)
  476 + {
  477 + OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream);
  478 + results.Add(offlineRecognizerResultEntity);
  479 + }
  480 + return results;
  481 + }
  482 + protected override void Dispose(bool disposing)
  483 + {
  484 + if (!disposing)
  485 + {
  486 + SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);
  487 + _offlineRecognizer.impl = IntPtr.Zero;
  488 + this._disposed = true;
  489 + base.Dispose();
  490 + }
  491 + }
  492 + }
  493 + internal static partial class SherpaOnnxSharp
  494 + {
  495 + private const string dllName = @"SherpaOnnxSharp";
  496 +
  497 + [DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  498 + internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config);
  499 +
  500 + [DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  501 + internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer);
  502 +
  503 + [DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  504 + internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size);
  505 +
  506 + [DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  507 + internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream);
  508 +
  509 + [DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  510 + internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr
  511 + streams, int n);
  512 +
  513 + [DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)]
  514 + internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream);
  515 +
  516 + [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]
  517 + internal static extern void DestroyOfflineRecognizerResult(IntPtr result);
  518 +
  519 + [DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)]
  520 + internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream);
  521 +
  522 + [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)]
  523 + internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer);
  524 +
  525 + [DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]
  526 + internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config);
  527 +
  528 + /// Free a pointer returned by CreateOnlineRecognizer()
  529 + ///
  530 + /// @param p A pointer returned by CreateOnlineRecognizer()
  531 + [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]
  532 + internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer);
  533 +
  534 + /// Create an online stream for accepting wave samples.
  535 + ///
  536 + /// @param recognizer A pointer returned by CreateOnlineRecognizer()
  537 + /// @return Return a pointer to an OnlineStream. The user has to invoke
  538 + /// DestroyOnlineStream() to free it to avoid memory leak.
  539 + [DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)]
  540 + internal static extern SherpaOnnxOnlineStream CreateOnlineStream(
  541 + SherpaOnnxOnlineRecognizer recognizer);
  542 +
  543 + /// Destroy an online stream.
  544 + ///
  545 + /// @param stream A pointer returned by CreateOnlineStream()
  546 + [DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)]
  547 + internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream);
  548 +
  549 + /// Accept input audio samples and compute the features.
  550 + /// The user has to invoke DecodeOnlineStream() to run the neural network and
  551 + /// decoding.
  552 + ///
  553 + /// @param stream A pointer returned by CreateOnlineStream().
  554 + /// @param sample_rate Sample rate of the input samples. If it is different
  555 + /// from config.feat_config.sample_rate, we will do
  556 + /// resampling inside sherpa-onnx.
  557 + /// @param samples A pointer to a 1-D array containing audio samples.
  558 + /// The range of samples has to be normalized to [-1, 1].
  559 + /// @param n Number of elements in the samples array.
  560 + [DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)]
  561 + internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate,
  562 + float[] samples, int n);
  563 +
  564 + /// Return 1 if there are enough number of feature frames for decoding.
  565 + /// Return 0 otherwise.
  566 + ///
  567 + /// @param recognizer A pointer returned by CreateOnlineRecognizer
  568 + /// @param stream A pointer returned by CreateOnlineStream
  569 + [DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)]
  570 + internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer,
  571 + SherpaOnnxOnlineStream stream);
  572 +
  573 + /// Call this function to run the neural network model and decoding.
  574 + //
  575 + /// Precondition for this function: IsOnlineStreamReady() MUST return 1.
  576 + ///
  577 + /// Usage example:
  578 + ///
  579 + /// while (IsOnlineStreamReady(recognizer, stream)) {
  580 + /// DecodeOnlineStream(recognizer, stream);
  581 + /// }
  582 + ///
  583 + [DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)]
  584 + internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer,
  585 + SherpaOnnxOnlineStream stream);
  586 +
  587 + /// This function is similar to DecodeOnlineStream(). It decodes multiple
  588 + /// OnlineStream in parallel.
  589 + ///
  590 + /// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
  591 + /// IsOnlineStreamReady() for that stream should return 1.
  592 + ///
  593 + /// @param recognizer A pointer returned by CreateOnlineRecognizer()
  594 + /// @param streams A pointer array containing pointers returned by
  595 + /// CreateOnlineRecognizer()
  596 + /// @param n Number of elements in the given streams array.
  597 + [DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)]
  598 + internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer,
  599 + IntPtr streams, int n);
  600 +
  601 + /// Get the decoding results so far for an OnlineStream.
  602 + ///
  603 + /// @param recognizer A pointer returned by CreateOnlineRecognizer().
  604 + /// @param stream A pointer returned by CreateOnlineStream().
  605 + /// @return A pointer containing the result. The user has to invoke
  606 + /// DestroyOnlineRecognizerResult() to free the returned pointer to
  607 + /// avoid memory leak.
  608 + [DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)]
  609 + internal static extern IntPtr GetOnlineStreamResult(
  610 + SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream);
  611 +
  612 + /// Destroy the pointer returned by GetOnlineStreamResult().
  613 + ///
  614 + /// @param r A pointer returned by GetOnlineStreamResult()
  615 + [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]
  616 + internal static extern void DestroyOnlineRecognizerResult(IntPtr result);
  617 +
  618 + /// Reset an OnlineStream , which clears the neural network model state
  619 + /// and the state for decoding.
  620 + ///
  621 + /// @param recognizer A pointer returned by CreateOnlineRecognizer().
  622 + /// @param stream A pointer returned by CreateOnlineStream
  623 + [DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)]
  624 + internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer,
  625 + SherpaOnnxOnlineStream stream);
  626 +
  627 + /// Signal that no more audio samples would be available.
  628 + /// After this call, you cannot call AcceptWaveform() any more.
  629 + ///
  630 + /// @param stream A pointer returned by CreateOnlineStream()
  631 + [DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)]
  632 + internal static extern void InputFinished(SherpaOnnxOnlineStream stream);
  633 +
  634 + /// Return 1 if an endpoint has been detected.
  635 + ///
  636 + /// @param recognizer A pointer returned by CreateOnlineRecognizer()
  637 + /// @param stream A pointer returned by CreateOnlineStream()
  638 + /// @return Return 1 if an endpoint is detected. Return 0 otherwise.
  639 + [DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)]
  640 + internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer,
  641 + SherpaOnnxOnlineStream stream);
  642 + }
  643 + internal struct SherpaOnnxOfflineTransducer
  644 + {
  645 + public string encoder_filename;
  646 + public string decoder_filename;
  647 + public string joiner_filename;
  648 + public SherpaOnnxOfflineTransducer()
  649 + {
  650 + encoder_filename = "";
  651 + decoder_filename = "";
  652 + joiner_filename = "";
  653 + }
  654 + };
  655 + internal struct SherpaOnnxOfflineParaformer
  656 + {
  657 + public string model;
  658 + public SherpaOnnxOfflineParaformer()
  659 + {
  660 + model = "";
  661 + }
  662 + };
  663 + internal struct SherpaOnnxOfflineNemoEncDecCtc
  664 + {
  665 + public string model;
  666 + public SherpaOnnxOfflineNemoEncDecCtc()
  667 + {
  668 + model = "";
  669 + }
  670 + };
  671 + internal struct SherpaOnnxOfflineModelConfig
  672 + {
  673 + public SherpaOnnxOfflineTransducer transducer;
  674 + public SherpaOnnxOfflineParaformer paraformer;
  675 + public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
  676 + public string tokens;
  677 + public int num_threads;
  678 + public bool debug;
  679 + };
  680 + /// It expects 16 kHz 16-bit single channel wave format.
  681 + internal struct SherpaOnnxFeatureConfig
  682 + {
  683 + /// Sample rate of the input data. MUST match the one expected
  684 + /// by the model. For instance, it should be 16000 for models provided
  685 + /// by us.
  686 + public int sample_rate;
  687 +
  688 + /// Feature dimension of the model.
  689 + /// For instance, it should be 80 for models provided by us.
  690 + public int feature_dim;
  691 + };
  692 + internal struct SherpaOnnxOfflineRecognizerConfig
  693 + {
  694 + public SherpaOnnxFeatureConfig feat_config;
  695 + public SherpaOnnxOfflineModelConfig model_config;
  696 +
  697 + /// Possible values are: greedy_search, modified_beam_search
  698 + public string decoding_method;
  699 +
  700 + };
  701 + internal struct SherpaOnnxOfflineRecognizer
  702 + {
  703 + public IntPtr impl;
  704 + };
  705 + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
  706 + internal struct SherpaOnnxOfflineStream
  707 + {
  708 + public IntPtr impl;
  709 + };
  710 + internal struct SherpaOnnxOfflineRecognizerResult
  711 + {
  712 + public IntPtr text;
  713 + public int text_len;
  714 + }
  715 + internal struct SherpaOnnxOnlineTransducer
  716 + {
  717 + public string encoder_filename;
  718 + public string decoder_filename;
  719 + public string joiner_filename;
  720 + public SherpaOnnxOnlineTransducer()
  721 + {
  722 + encoder_filename = string.Empty;
  723 + decoder_filename = string.Empty;
  724 + joiner_filename = string.Empty;
  725 + }
  726 + };
  727 + internal struct SherpaOnnxOnlineModelConfig
  728 + {
  729 + public SherpaOnnxOnlineTransducer transducer;
  730 + public string tokens;
  731 + public int num_threads;
  732 + public bool debug; // true to print debug information of the model
  733 + };
  734 + internal struct SherpaOnnxOnlineRecognizerConfig
  735 + {
  736 + public SherpaOnnxFeatureConfig feat_config;
  737 + public SherpaOnnxOnlineModelConfig model_config;
  738 +
  739 + /// Possible values are: greedy_search, modified_beam_search
  740 + public string decoding_method;
  741 +
  742 + /// Used only when decoding_method is modified_beam_search
  743 + /// Example value: 4
  744 + public int max_active_paths;
  745 +
  746 + /// 0 to disable endpoint detection.
  747 + /// A non-zero value to enable endpoint detection.
  748 + public int enable_endpoint;
  749 +
  750 + /// An endpoint is detected if trailing silence in seconds is larger than
  751 + /// this value even if nothing has been decoded.
  752 + /// Used only when enable_endpoint is not 0.
  753 + public float rule1_min_trailing_silence;
  754 +
  755 + /// An endpoint is detected if trailing silence in seconds is larger than
  756 + /// this value after something that is not blank has been decoded.
  757 + /// Used only when enable_endpoint is not 0.
  758 + public float rule2_min_trailing_silence;
  759 +
  760 + /// An endpoint is detected if the utterance in seconds is larger than
  761 + /// this value.
  762 + /// Used only when enable_endpoint is not 0.
  763 + public float rule3_min_utterance_length;
  764 + };
  765 + internal struct SherpaOnnxOnlineRecognizerResult
  766 + {
  767 + public IntPtr text;
  768 + public int text_len;
  769 + // TODO: Add more fields
  770 + }
  771 + internal struct SherpaOnnxOnlineRecognizer
  772 + {
  773 + public IntPtr impl;
  774 + };
  775 + [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]
  776 + internal struct SherpaOnnxOnlineStream
  777 + {
  778 + public IntPtr impl;
  779 + };
  780 + public class OfflineNemoEncDecCtc
  781 + {
  782 + private string model = string.Empty;
  783 + public string Model { get => model; set => model = value; }
  784 + }
  785 + public class OfflineParaformer
  786 + {
  787 + private string model = string.Empty;
  788 + public string Model { get => model; set => model = value; }
  789 + }
  790 + public class OfflineRecognizerResultEntity
  791 + {
  792 + /// <summary>
  793 + /// recognizer result
  794 + /// </summary>
  795 + public string? text { get; set; }
  796 + /// <summary>
  797 + /// recognizer result length
  798 + /// </summary>
  799 + public int text_len { get; set; }
  800 + /// <summary>
  801 + /// decode tokens
  802 + /// </summary>
  803 + public List<string>? tokens { get; set; }
  804 + /// <summary>
  805 + /// timestamps
  806 + /// </summary>
  807 + public List<float>? timestamps { get; set; }
  808 + }
  809 + public class OfflineTransducer
  810 + {
  811 + private string encoderFilename = string.Empty;
  812 + private string decoderFilename = string.Empty;
  813 + private string joinerFilename = string.Empty;
  814 + public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }
  815 + public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }
  816 + public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }
  817 + }
  818 + public class OnlineEndpoint
  819 + {
  820 + /// 0 to disable endpoint detection.
  821 + /// A non-zero value to enable endpoint detection.
  822 + private int enableEndpoint;
  823 +
  824 + /// An endpoint is detected if trailing silence in seconds is larger than
  825 + /// this value even if nothing has been decoded.
  826 + /// Used only when enable_endpoint is not 0.
  827 + private float rule1MinTrailingSilence;
  828 +
  829 + /// An endpoint is detected if trailing silence in seconds is larger than
  830 + /// this value after something that is not blank has been decoded.
  831 + /// Used only when enable_endpoint is not 0.
  832 + private float rule2MinTrailingSilence;
  833 +
  834 + /// An endpoint is detected if the utterance in seconds is larger than
  835 + /// this value.
  836 + /// Used only when enable_endpoint is not 0.
  837 + private float rule3MinUtteranceLength;
  838 +
  839 + public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; }
  840 + public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; }
  841 + public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; }
  842 + public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; }
  843 + }
  844 + public class OnlineRecognizerResultEntity
  845 + {
  846 + /// <summary>
  847 + /// recognizer result
  848 + /// </summary>
  849 + public string? text { get; set; }
  850 + /// <summary>
  851 + /// recognizer result length
  852 + /// </summary>
  853 + public int text_len { get; set; }
  854 + /// <summary>
  855 + /// decode tokens
  856 + /// </summary>
  857 + public List<string>? tokens { get; set; }
  858 + /// <summary>
  859 + /// timestamps
  860 + /// </summary>
  861 + public List<float>? timestamps { get; set; }
  862 + }
  863 + public class OnlineTransducer
  864 + {
  865 + private string encoderFilename = string.Empty;
  866 + private string decoderFilename = string.Empty;
  867 + private string joinerFilename = string.Empty;
  868 + public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }
  869 + public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }
  870 + public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }
  871 + }
  872 +}
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <TargetFramework>net6.0</TargetFramework>
  5 + <ImplicitUsings>enable</ImplicitUsings>
  6 + <Nullable>enable</Nullable>
  7 + <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  8 + </PropertyGroup>
  9 +
  10 +</Project>
  1 +// sherpa-onnx/sharp-api/offline-api.cpp
  2 +//
  3 +// Copyright (c) 2023 Manyeyes Corporation
  4 +
  5 +#include "offline-api.h"
  6 +
  7 +#include "sherpa-onnx/csrc/display.h"
  8 +#include "sherpa-onnx/csrc/offline-recognizer.h"
  9 +
  10 +namespace sherpa_onnx
  11 +{
  12 + struct SherpaOnnxOfflineRecognizer {
  13 + sherpa_onnx::OfflineRecognizer* impl;
  14 + };
  15 +
  16 + struct SherpaOnnxOfflineStream {
  17 + std::unique_ptr<sherpa_onnx::OfflineStream> impl;
  18 + explicit SherpaOnnxOfflineStream(std::unique_ptr<sherpa_onnx::OfflineStream> p)
  19 + : impl(std::move(p)) {}
  20 + };
  21 +
  22 + struct SherpaOnnxDisplay {
  23 + std::unique_ptr<sherpa_onnx::Display> impl;
  24 + };
  25 +
  26 + SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer(
  27 + const SherpaOnnxOfflineRecognizerConfig* config) {
  28 + sherpa_onnx::OfflineRecognizerConfig recognizer_config;
  29 +
  30 + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
  31 + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
  32 +
  33 + if (strlen(config->model_config.transducer.encoder_filename) > 0) {
  34 + recognizer_config.model_config.transducer.encoder_filename =
  35 + config->model_config.transducer.encoder_filename;
  36 + recognizer_config.model_config.transducer.decoder_filename =
  37 + config->model_config.transducer.decoder_filename;
  38 + recognizer_config.model_config.transducer.joiner_filename =
  39 + config->model_config.transducer.joiner_filename;
  40 + }
  41 + else if (strlen(config->model_config.paraformer.model) > 0) {
  42 + recognizer_config.model_config.paraformer.model =
  43 + config->model_config.paraformer.model;
  44 + }
  45 + else if (strlen(config->model_config.nemo_ctc.model) > 0) {
  46 + recognizer_config.model_config.nemo_ctc.model =
  47 + config->model_config.nemo_ctc.model;
  48 + }
  49 +
  50 + recognizer_config.model_config.tokens =
  51 + config->model_config.tokens;
  52 + recognizer_config.model_config.num_threads =
  53 + config->model_config.num_threads;
  54 + recognizer_config.model_config.debug =
  55 + config->model_config.debug;
  56 +
  57 + recognizer_config.decoding_method = config->decoding_method;
  58 +
  59 + SherpaOnnxOfflineRecognizer* recognizer =
  60 + new SherpaOnnxOfflineRecognizer;
  61 + recognizer->impl =
  62 + new sherpa_onnx::OfflineRecognizer(recognizer_config);
  63 +
  64 + return recognizer;
  65 + }
  66 +
  67 + SherpaOnnxOfflineStream* __stdcall CreateOfflineStream(
  68 + SherpaOnnxOfflineRecognizer* recognizer) {
  69 + SherpaOnnxOfflineStream* stream =
  70 + new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
  71 + return stream;
  72 + }
  73 +
  74 + void __stdcall AcceptWaveform(
  75 + SherpaOnnxOfflineStream* stream,
  76 + int32_t sample_rate,
  77 + const float* samples, int32_t samples_size) {
  78 + std::vector<float> waveform{ samples, samples + samples_size };
  79 + stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size());
  80 + }
  81 +
  82 + void __stdcall DecodeOfflineStream(
  83 + SherpaOnnxOfflineRecognizer* recognizer,
  84 + SherpaOnnxOfflineStream* stream) {
  85 + recognizer->impl->DecodeStream(stream->impl.get());
  86 + }
  87 +
  88 + void __stdcall DecodeMultipleOfflineStreams(
  89 + SherpaOnnxOfflineRecognizer* recognizer,
  90 + SherpaOnnxOfflineStream** streams, int32_t n) {
  91 + std::vector<sherpa_onnx::OfflineStream*> ss(n);
  92 + for (int32_t i = 0; i != n; ++i) {
  93 + ss[i] = streams[i]->impl.get();
  94 + }
  95 + recognizer->impl->DecodeStreams(ss.data(), n);
  96 + }
  97 +
  98 + SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult(
  99 + SherpaOnnxOfflineStream* stream) {
  100 + sherpa_onnx::OfflineRecognitionResult result =
  101 + stream->impl->GetResult();
  102 + const auto& text = result.text;
  103 + auto r = new SherpaOnnxOfflineRecognizerResult;
  104 + r->text = new char[text.size() + 1];
  105 + std::copy(text.begin(), text.end(), const_cast<char*>(r->text));
  106 + const_cast<char*>(r->text)[text.size()] = 0;
  107 + r->text_len = text.size();
  108 + return r;
  109 + }
  110 +
  111 +
  112 + /// Free a pointer returned by CreateOfflineRecognizer()
  113 + ///
  114 + /// @param p A pointer returned by CreateOfflineRecognizer()
  115 + void __stdcall DestroyOfflineRecognizer(
  116 + SherpaOnnxOfflineRecognizer* recognizer) {
  117 + delete recognizer->impl;
  118 + delete recognizer;
  119 + }
  120 +
  121 + /// Destory an offline stream.
  122 + ///
  123 + /// @param stream A pointer returned by CreateOfflineStream()
  124 + void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) {
  125 + delete stream;
  126 + }
  127 +
  128 + /// Destroy the pointer returned by GetOfflineStreamResult().
  129 + ///
  130 + /// @param r A pointer returned by GetOfflineStreamResult()
  131 + void __stdcall DestroyOfflineRecognizerResult(
  132 + SherpaOnnxOfflineRecognizerResult* r) {
  133 + delete r->text;
  134 + delete r;
  135 + }
  136 +}// namespace sherpa_onnx
  1 +// sherpa-onnx/sharp-api/offline-api.h
  2 +//
  3 +// Copyright (c) 2023 Manyeyes Corporation
  4 +
  5 +#pragma once
  6 +
  7 +#include <list>
  8 +
  9 +namespace sherpa_onnx
  10 +{
  11 + /// Please refer to
  12 + /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  13 + /// to download pre-trained models. That is, you can find encoder-xxx.onnx
  14 + /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
  15 + /// from there.
  16 + typedef struct SherpaOnnxOfflineTransducer {
  17 + const char* encoder_filename;
  18 + const char* decoder_filename;
  19 + const char* joiner_filename;
  20 + } SherpaOnnxOfflineTransducer;
  21 +
  22 + typedef struct SherpaOnnxOfflineParaformer {
  23 + const char* model;
  24 + }SherpaOnnxOfflineParaformer;
  25 +
  26 + typedef struct SherpaOnnxOfflineNemoEncDecCtc {
  27 + const char* model;
  28 + }SherpaOnnxOfflineNemoEncDecCtc;
  29 +
  30 +
  31 + typedef struct SherpaOnnxOfflineModelConfig {
  32 + SherpaOnnxOfflineTransducer transducer;
  33 + SherpaOnnxOfflineParaformer paraformer;
  34 + SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
  35 + const char* tokens;
  36 + const int32_t num_threads;
  37 + const bool debug;
  38 + } SherpaOnnxOfflineModelConfig;
  39 +
  40 + /// It expects 16 kHz 16-bit single channel wave format.
  41 + typedef struct SherpaOnnxFeatureConfig {
  42 + /// Sample rate of the input data. MUST match the one expected
  43 + /// by the model. For instance, it should be 16000 for models provided
  44 + /// by us.
  45 + int32_t sample_rate;
  46 +
  47 + /// Feature dimension of the model.
  48 + /// For instance, it should be 80 for models provided by us.
  49 + int32_t feature_dim;
  50 + } SherpaOnnxFeatureConfig;
  51 +
  52 + typedef struct SherpaOnnxOfflineRecognizerConfig {
  53 + SherpaOnnxFeatureConfig feat_config;
  54 + SherpaOnnxOfflineModelConfig model_config;
  55 +
  56 + /// Possible values are: greedy_search, modified_beam_search
  57 + const char* decoding_method;
  58 +
  59 + } SherpaOnnxOfflineRecognizerConfig;
  60 +
  61 + typedef struct SherpaOnnxOfflineRecognizerResult {
  62 + // Recognition results.
  63 + // For English, it consists of space separated words.
  64 + // For Chinese, it consists of Chinese words without spaces.
  65 + char* text;
  66 + int text_len;
  67 +
  68 + // Decoded results at the token level.
  69 + // For instance, for BPE-based models it consists of a list of BPE tokens.
  70 + // std::vector<std::string> tokens;
  71 +
  72 + // timestamps.size() == tokens.size()
  73 + // timestamps[i] records the time in seconds when tokens[i] is decoded.
  74 + // std::vector<float> timestamps;
  75 + } SherpaOnnxOfflineRecognizerResult;
  76 +
  77 + /// Note: OfflineRecognizer here means StreamingRecognizer.
  78 + /// It does not need to access the Internet during recognition.
  79 + /// Everything is run locally.
  80 + typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer;
  81 +
  82 + typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
  83 +
  84 + extern "C" __declspec(dllexport)
  85 + SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer(
  86 + const SherpaOnnxOfflineRecognizerConfig * config);
  87 +
  88 + extern "C" __declspec(dllexport)
  89 + SherpaOnnxOfflineStream * __stdcall CreateOfflineStream(
  90 + SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer);
  91 +
  92 + extern "C" __declspec(dllexport)
  93 + void __stdcall AcceptWaveform(
  94 + SherpaOnnxOfflineStream * stream, int32_t sample_rate,
  95 + const float* samples, int32_t samples_size);
  96 +
  97 + extern "C" __declspec(dllexport)
  98 + void __stdcall DecodeOfflineStream(
  99 + SherpaOnnxOfflineRecognizer * recognizer,
  100 + SherpaOnnxOfflineStream * stream);
  101 +
  102 + extern "C" __declspec(dllexport)
  103 + void __stdcall DecodeMultipleOfflineStreams(
  104 + SherpaOnnxOfflineRecognizer * recognizer,
  105 + SherpaOnnxOfflineStream * *streams, int32_t n);
  106 +
  107 + extern "C" __declspec(dllexport)
  108 + SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult(
  109 + SherpaOnnxOfflineStream * stream);
  110 +
  111 + extern "C" __declspec(dllexport)
  112 + void __stdcall DestroyOfflineRecognizer(
  113 + SherpaOnnxOfflineRecognizer * recognizer);
  114 +
  115 + extern "C" __declspec(dllexport)
  116 + void __stdcall DestroyOfflineStream(
  117 + SherpaOnnxOfflineStream * stream);
  118 +
  119 + extern "C" __declspec(dllexport)
  120 + void __stdcall DestroyOfflineRecognizerResult(
  121 + SherpaOnnxOfflineRecognizerResult * r);
  122 +}// namespace sherpa_onnx