Fangjun Kuang
Committed by GitHub

Refactor C# code and support building nuget packages for cross-platforms (#144)

正在显示 39 个修改的文件 包含 2041 行增加2302 行删除
  1 +name: dot-net
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - dot-net
  7 + tags:
  8 + - '*'
  9 +
  10 +concurrency:
  11 + group: dot-net-${{ github.ref }}
  12 + cancel-in-progress: true
  13 +
  14 +jobs:
  15 + build-libs:
  16 + name: dot-net for ${{ matrix.os }}
  17 + runs-on: ${{ matrix.os }}
  18 + strategy:
  19 + fail-fast: false
  20 + matrix:
  21 + os: [ubuntu-latest, windows-latest, macos-latest]
  22 +
  23 + steps:
  24 + - uses: actions/checkout@v2
  25 + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/
  26 + # for a list of versions
  27 + - name: Build wheels
  28 + uses: pypa/cibuildwheel@v2.11.4
  29 + env:
  30 + CIBW_BEFORE_BUILD: "pip install -U cmake numpy"
  31 + CIBW_BUILD: "cp38-*64"
  32 + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
  33 + CIBW_BUILD_VERBOSITY: 3
  34 + CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib'
  35 + CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""
  36 +
  37 + - name: Display wheels
  38 + shell: bash
  39 + run: |
  40 + ls -lh ./wheelhouse/*.whl
  41 + unzip -l ./wheelhouse/*.whl
  42 +
  43 + - uses: actions/upload-artifact@v2
  44 + with:
  45 + name: ${{ matrix.os }}-wheels
  46 + path: ./wheelhouse/*.whl
  47 +
  48 + build-nuget-packages:
  49 + name: build-nuget-packages
  50 + runs-on: ubuntu-latest
  51 + needs: build-libs
  52 +
  53 + steps:
  54 + - uses: actions/checkout@v2
  55 +
  56 + - name: Retrieve artifact from ubuntu-latest
  57 + uses: actions/download-artifact@v2
  58 + with:
  59 + name: ubuntu-latest-wheels
  60 + path: ./linux
  61 +
  62 + - name: Retrieve artifact from macos-latest
  63 + uses: actions/download-artifact@v2
  64 + with:
  65 + name: macos-latest-wheels
  66 + path: ./macos
  67 +
  68 + - name: Retrieve artifact from windows-latest
  69 + uses: actions/download-artifact@v2
  70 + with:
  71 + name: windows-latest-wheels
  72 + path: ./windows
  73 +
  74 + - name: Display wheels
  75 + shell: bash
  76 + run: |
  77 + tree .
  78 +
  79 + - name: Unzip Ubuntu wheels
  80 + shell: bash
  81 + run: |
  82 + cd linux
  83 + unzip ./*.whl
  84 + tree .
  85 +
  86 + - name: Unzip macOS wheels
  87 + shell: bash
  88 + run: |
  89 + cd macos
  90 + unzip ./*.whl
  91 + tree .
  92 +
  93 + - name: Unzip Windows wheels
  94 + shell: bash
  95 + run: |
  96 + cd windows
  97 + unzip ./*.whl
  98 + cp -v ./*.dll sherpa_onnx/lib/
  99 + tree .
  100 +
  101 + - name: Setup .NET Core 3.1
  102 + uses: actions/setup-dotnet@v1
  103 + with:
  104 + dotnet-version: 3.1.x
  105 +
  106 + - name: Setup .NET 7.0
  107 + uses: actions/setup-dotnet@v1
  108 + with:
  109 + dotnet-version: 7.0.x
  110 +
  111 + - name: Check dotnet
  112 + run: dotnet --info
  113 +
  114 + - name: build nuget packages
  115 + shell: bash
  116 + run: |
  117 + cd scripts/dotnet
  118 + ./run.sh
  119 + ls -lh packages
  120 +
  121 + - uses: actions/upload-artifact@v2
  122 + name: upload nuget packages
  123 + with:
  124 + name: nuget-packages
  125 + path: scripts/dotnet/packages/*.nupkg
  126 +
  127 + - name: publish .Net packages to nuget.org
  128 + if: github.repository == 'csukuangfj/sherpa-onnx' || github.repository == 'k2-fsa/sherpa-onnx'
  129 + shell: bash
  130 + env:
  131 + API_KEY: ${{ secrets.NUGET_API_KEY }}
  132 + run: |
  133 + # API_KEY is valid until 2024.05.02
  134 + cd scripts/dotnet/packages
  135 + dotnet nuget push ./org.k2fsa.sherpa.onnx.*.nupkg --skip-duplicate --api-key $API_KEY --source https://api.nuget.org/v3/index.json
  1 +name: test-dot-net
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - master
  7 + paths:
  8 + - '.github/workflows/test-dot-net'
  9 + - 'dotnet-examples/**'
  10 +
  11 + pull_request:
  12 + branches:
  13 + - master
  14 + paths:
  15 + - '.github/workflows/test-dot-net'
  16 + - 'dotnet-examples/**'
  17 +
  18 + schedule:
  19 + # minute (0-59)
  20 + # hour (0-23)
  21 + # day of the month (1-31)
  22 + # month (1-12)
  23 + # day of the week (0-6)
  24 + # nightly build at 23:50 UTC time every day
  25 + - cron: "50 23 * * *"
  26 +
  27 +concurrency:
  28 + group: test-dot-net
  29 + cancel-in-progress: true
  30 +
  31 +permissions:
  32 + contents: read
  33 +
  34 +jobs:
  35 + test-dot-net:
  36 + runs-on: ${{ matrix.os }}
  37 + strategy:
  38 + fail-fast: false
  39 + matrix:
  40 + os: [ubuntu-latest, macos-latest, windows-latest]
  41 +
  42 + steps:
  43 + - uses: actions/checkout@v2
  44 + with:
  45 + fetch-depth: 0
  46 +
  47 + - name: Setup .NET Core 3.1
  48 + uses: actions/setup-dotnet@v1
  49 + with:
  50 + dotnet-version: 3.1.x
  51 +
  52 + - name: Setup .NET 6.0
  53 + uses: actions/setup-dotnet@v1
  54 + with:
  55 + dotnet-version: 6.0.x
  56 +
  57 + - name: Check dotnet
  58 + run: dotnet --info
  59 +
  60 + - name: Decode a file
  61 + shell: bash
  62 + run: |
  63 + cd dotnet-examples/
  64 + cd online-decode-files
  65 + ./run.sh
  66 +
  67 + cd ../offline-decode-files
  68 + ./run-nemo-ctc.sh
  69 + ./run-paraformer.sh
  70 + ./run-zipformer.sh
@@ -57,3 +57,4 @@ sherpa-onnx-nemo-ctc-en-citrinet-512 @@ -57,3 +57,4 @@ sherpa-onnx-nemo-ctc-en-citrinet-512
57 run-offline-decode-files-nemo-ctc.sh 57 run-offline-decode-files-nemo-ctc.sh
58 *.jar 58 *.jar
59 sherpa-onnx-nemo-ctc-* 59 sherpa-onnx-nemo-ctc-*
  60 +*.wav
1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.4.1") 4 +set(SHERPA_ONNX_VERSION "1.4.2")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
@@ -37,16 +37,12 @@ endif() @@ -37,16 +37,12 @@ endif()
37 set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) 37 set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
38 set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) 38 set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN})
39 39
40 -if(BUILD_SHARED_LIBS AND MSVC)  
41 - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)  
42 -endif()  
43 -  
44 if(NOT CMAKE_BUILD_TYPE) 40 if(NOT CMAKE_BUILD_TYPE)
45 message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") 41 message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
46 set(CMAKE_BUILD_TYPE Release) 42 set(CMAKE_BUILD_TYPE Release)
47 endif() 43 endif()
48 44
49 -if(DEFINED ANDROID_ABI) 45 +if(DEFINED ANDROID_ABI AND NOT SHERPA_ONNX_ENABLE_JNI)
50 message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android") 46 message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android")
51 set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE) 47 set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE)
52 endif() 48 endif()
@@ -61,6 +57,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS) @@ -61,6 +57,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS)
61 set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) 57 set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
62 endif() 58 endif()
63 59
  60 +if(BUILD_SHARED_LIBS AND MSVC)
  61 + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
  62 +endif()
  63 +
64 message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") 64 message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
65 message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") 65 message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
66 message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") 66 message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
@@ -41,7 +41,6 @@ try: @@ -41,7 +41,6 @@ try:
41 # -linux_x86_64.whl 41 # -linux_x86_64.whl
42 self.root_is_pure = False 42 self.root_is_pure = False
43 43
44 -  
45 except ImportError: 44 except ImportError:
46 bdist_wheel = None 45 bdist_wheel = None
47 46
@@ -78,7 +77,6 @@ class BuildExtension(build_ext): @@ -78,7 +77,6 @@ class BuildExtension(build_ext):
78 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF " 77 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF "
79 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON " 78 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON "
80 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON " 79 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON "
81 - extra_cmake_args += " -DSHERPA_ONNX_ENABLE_C_API=OFF "  
82 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON " 80 extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON "
83 81
84 if "PYTHON_EXECUTABLE" not in cmake_args: 82 if "PYTHON_EXECUTABLE" not in cmake_args:
1 -// See https://aka.ms/new-console-template for more information  
2 -// Copyright (c) 2023 by manyeyes  
3 -using SherpaOnnx;  
4 -/// Please refer to  
5 -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
6 -/// to download pre-trained models. That is, you can find encoder-xxx.onnx  
7 -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct  
8 -/// from there.  
9 -  
10 -/// download model eg:  
11 -/// (The directory where the application runs)  
12 -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory  
13 -/// cd /path/to  
14 -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01  
15 -/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example  
16 -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512  
17 -  
18 -/// NuGet for sherpa-onnx  
19 -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx  
20 -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx  
21 -  
22 -// transducer Usage:  
23 -/*  
24 - .\SherpaOnnx.Examples.exe `  
25 - --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `  
26 - --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `  
27 - --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `  
28 - --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `  
29 - --num-threads=2 `  
30 - --decoding-method=greedy_search `  
31 - --debug=false `  
32 - ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav  
33 - */  
34 -  
35 -// paraformer Usage:  
36 -/*  
37 - .\SherpaOnnx.Examples.exe `  
38 - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `  
39 - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `  
40 - --num-threads=2 `  
41 - --decoding-method=greedy_search `  
42 - --debug=false `  
43 - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav  
44 - */  
45 -  
46 -// paraformer Usage:  
47 -/*  
48 - .\SherpaOnnx.Examples.exe `  
49 - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `  
50 - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `  
51 - --num-threads=2 `  
52 - --decoding-method=greedy_search `  
53 - --debug=false `  
54 - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav  
55 - */  
56 -  
57 -  
58 -internal class OfflineDecodeFiles  
59 -{  
60 - static void Main(string[] args)  
61 - {  
62 - string usage = @"  
63 ------------------------------  
64 -transducer Usage:  
65 - --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt `  
66 - --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx `  
67 - --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx `  
68 - --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx `  
69 - --num-threads=2 `  
70 - --decoding-method=greedy_search `  
71 - --debug=false `  
72 - ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav  
73 -  
74 -paraformer Usage:  
75 - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt `  
76 - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx `  
77 - --num-threads=2 `  
78 - --decoding-method=greedy_search `  
79 - --debug=false `  
80 - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav  
81 -  
82 -nemo Usage:  
83 - --tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt `  
84 - --nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx `  
85 - --num-threads=2 `  
86 - --decoding-method=greedy_search `  
87 - --debug=false `  
88 - ./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav  
89 ------------------------------  
90 -";  
91 - if (args.Length == 0)  
92 - {  
93 - System.Console.WriteLine("Please enter the correct parameters:");  
94 - System.Console.WriteLine(usage);  
95 - System.Text.StringBuilder sb = new System.Text.StringBuilder();  
96 - //args = Console.ReadLine().Split(" ");  
97 - while (true)  
98 - {  
99 - string input = Console.ReadLine();  
100 - sb.AppendLine(input);  
101 - if (Console.ReadKey().Key == ConsoleKey.Enter)  
102 - break;  
103 - }  
104 - args = sb.ToString().Split("\r\n");  
105 - }  
106 - Console.WriteLine("Started!\n");  
107 - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;  
108 - List<string> wavFiles = new List<string>();  
109 - Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);  
110 - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";  
111 - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";  
112 - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";  
113 - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";  
114 - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";  
115 - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";  
116 - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";  
117 - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";  
118 - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";  
119 -  
120 - OfflineTransducer offlineTransducer = new OfflineTransducer();  
121 - offlineTransducer.EncoderFilename = encoder;  
122 - offlineTransducer.DecoderFilename = decoder;  
123 - offlineTransducer.JoinerFilename = joiner;  
124 -  
125 - OfflineParaformer offlineParaformer = new OfflineParaformer();  
126 - offlineParaformer.Model = paraformer;  
127 -  
128 - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();  
129 - offlineNemoEncDecCtc.Model = nemo_ctc;  
130 -  
131 - int numThreads = 0;  
132 - int.TryParse(num_threads, out numThreads);  
133 - bool isDebug = false;  
134 - bool.TryParse(debug, out isDebug);  
135 -  
136 - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;  
137 -  
138 - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))  
139 - && string.IsNullOrEmpty(paraformer)  
140 - && string.IsNullOrEmpty(nemo_ctc))  
141 - {  
142 - Console.WriteLine("Please specify at least one model");  
143 - Console.WriteLine(usage);  
144 - }  
145 - // batch decode  
146 - TimeSpan total_duration = TimeSpan.Zero;  
147 - TimeSpan start_time = TimeSpan.Zero;  
148 - TimeSpan end_time = TimeSpan.Zero;  
149 - List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();  
150 - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))  
151 - {  
152 - OfflineRecognizer<OfflineTransducer> offlineRecognizer = new OfflineRecognizer<OfflineTransducer>(  
153 - offlineTransducer,  
154 - tokens,  
155 - num_threads: numThreads,  
156 - debug: isDebug,  
157 - decoding_method: decodingMethod);  
158 - List<float[]> samplesList = new List<float[]>();  
159 - foreach (string wavFile in wavFiles)  
160 - {  
161 - TimeSpan duration = TimeSpan.Zero;  
162 - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);  
163 - samplesList.Add(samples);  
164 - total_duration += duration;  
165 - }  
166 - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);  
167 - start_time = new TimeSpan(DateTime.Now.Ticks);  
168 - offlineRecognizer.DecodeMultipleOfflineStreams(streams);  
169 - results = offlineRecognizer.GetResults(streams);  
170 - end_time = new TimeSpan(DateTime.Now.Ticks);  
171 - }  
172 - else if (!string.IsNullOrEmpty(paraformer))  
173 - {  
174 - OfflineRecognizer<OfflineParaformer> offlineRecognizer = new OfflineRecognizer<OfflineParaformer>(  
175 - offlineParaformer,  
176 - tokens,  
177 - num_threads: numThreads,  
178 - debug: isDebug,  
179 - decoding_method: decodingMethod);  
180 - List<float[]> samplesList = new List<float[]>();  
181 - foreach (string wavFile in wavFiles)  
182 - {  
183 - TimeSpan duration = TimeSpan.Zero;  
184 - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);  
185 - samplesList.Add(samples);  
186 - total_duration += duration;  
187 - }  
188 - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);  
189 - start_time = new TimeSpan(DateTime.Now.Ticks);  
190 - offlineRecognizer.DecodeMultipleOfflineStreams(streams);  
191 - results = offlineRecognizer.GetResults(streams);  
192 - end_time = new TimeSpan(DateTime.Now.Ticks);  
193 - }  
194 - else if (!string.IsNullOrEmpty(nemo_ctc))  
195 - {  
196 - OfflineRecognizer<OfflineNemoEncDecCtc> offlineRecognizer = new OfflineRecognizer<OfflineNemoEncDecCtc>(  
197 - offlineNemoEncDecCtc,  
198 - tokens,  
199 - num_threads: numThreads,  
200 - debug: isDebug,  
201 - decoding_method: decodingMethod);  
202 - List<float[]> samplesList = new List<float[]>();  
203 - foreach (string wavFile in wavFiles)  
204 - {  
205 - TimeSpan duration = TimeSpan.Zero;  
206 - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);  
207 - samplesList.Add(samples);  
208 - total_duration += duration;  
209 - }  
210 - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList);  
211 - start_time = new TimeSpan(DateTime.Now.Ticks);  
212 - offlineRecognizer.DecodeMultipleOfflineStreams(streams);  
213 - results = offlineRecognizer.GetResults(streams);  
214 - end_time = new TimeSpan(DateTime.Now.Ticks);  
215 - }  
216 -  
217 - foreach (var item in results.Zip<OfflineRecognizerResultEntity, string>(wavFiles))  
218 - {  
219 - Console.WriteLine("wavFile:{0}", item.Second);  
220 - Console.WriteLine("text:{0}", item.First.text.ToLower());  
221 - Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());  
222 - }  
223 -  
224 - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;  
225 - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;  
226 - Console.WriteLine("num_threads:{0}", num_threads);  
227 - Console.WriteLine("decoding_method:{0}", decodingMethod);  
228 - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());  
229 - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());  
230 - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());  
231 -  
232 - Console.WriteLine("End!");  
233 - }  
234 -  
235 - static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)  
236 - {  
237 - Dictionary<string, string> argsDict = new Dictionary<string, string>();  
238 - foreach (string input in args)  
239 - {  
240 - string[] ss = input.Split("=");  
241 - if (ss.Length == 1)  
242 - {  
243 - if (!string.IsNullOrEmpty(ss[0]))  
244 - {  
245 - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));  
246 - }  
247 - }  
248 - else  
249 - {  
250 - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));  
251 - }  
252 - }  
253 - return argsDict;  
254 - }  
255 -}  
1 -// See https://aka.ms/new-console-template for more information  
2 -// Copyright (c) 2023 by manyeyes  
3 -using SherpaOnnx;  
4 -/// Please refer to  
5 -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
6 -/// to download pre-trained models. That is, you can find encoder-xxx.onnx  
7 -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct  
8 -/// from there.  
9 -  
10 -/// download model eg:  
11 -/// (The directory where the application runs)  
12 -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory  
13 -/// cd /path/to  
14 -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20  
15 -  
16 -/// NuGet for sherpa-onnx  
17 -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx  
18 -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx  
19 -  
20 -// transducer Usage:  
21 -/*  
22 - .\SherpaOnnx.Examples.exe `  
23 - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `  
24 - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `  
25 - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `  
26 - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `  
27 - --num-threads=2 `  
28 - --decoding-method=modified_beam_search `  
29 - --debug=false `  
30 - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav  
31 - */  
32 -  
33 -internal class OnlineDecodeFile  
34 -{  
35 - static void Main(string[] args)  
36 - {  
37 - string usage = @"  
38 ------------------------------  
39 -transducer Usage:  
40 - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `  
41 - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `  
42 - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `  
43 - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `  
44 - --num-threads=2 `  
45 - --decoding-method=modified_beam_search `  
46 - --debug=false `  
47 - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav  
48 ------------------------------  
49 -";  
50 - if (args.Length == 0)  
51 - {  
52 - System.Console.WriteLine("Please enter the correct parameters:");  
53 - System.Console.WriteLine(usage);  
54 - System.Text.StringBuilder sb = new System.Text.StringBuilder();  
55 - //args = Console.ReadLine().Split(" ");  
56 - while (true)  
57 - {  
58 - string input = Console.ReadLine();  
59 - sb.AppendLine(input);  
60 - if (Console.ReadKey().Key == ConsoleKey.Enter)  
61 - break;  
62 - }  
63 - args = sb.ToString().Split("\r\n");  
64 - }  
65 - Console.WriteLine("Started!\n");  
66 - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;  
67 - List<string> wavFiles = new List<string>();  
68 - Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);  
69 - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";  
70 - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";  
71 - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";  
72 - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";  
73 - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";  
74 - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";  
75 - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";  
76 - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";  
77 - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";  
78 -  
79 - OfflineTransducer offlineTransducer = new OfflineTransducer();  
80 - offlineTransducer.EncoderFilename = encoder;  
81 - offlineTransducer.DecoderFilename = decoder;  
82 - offlineTransducer.JoinerFilename = joiner;  
83 -  
84 - OfflineParaformer offlineParaformer = new OfflineParaformer();  
85 - offlineParaformer.Model = paraformer;  
86 -  
87 - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();  
88 - offlineNemoEncDecCtc.Model = nemo_ctc;  
89 -  
90 - int numThreads = 0;  
91 - int.TryParse(num_threads, out numThreads);  
92 - bool isDebug = false;  
93 - bool.TryParse(debug, out isDebug);  
94 -  
95 - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;  
96 -  
97 - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))  
98 - && string.IsNullOrEmpty(paraformer)  
99 - && string.IsNullOrEmpty(nemo_ctc))  
100 - {  
101 - Console.WriteLine("Please specify at least one model");  
102 - Console.WriteLine(usage);  
103 - }  
104 - // batch decode  
105 - TimeSpan total_duration = TimeSpan.Zero;  
106 - TimeSpan start_time = TimeSpan.Zero;  
107 - TimeSpan end_time = TimeSpan.Zero;  
108 - List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();  
109 - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))  
110 - {  
111 - OnlineTransducer onlineTransducer = new OnlineTransducer();  
112 - onlineTransducer.EncoderFilename = encoder;  
113 - onlineTransducer.DecoderFilename = decoder;  
114 - onlineTransducer.JoinerFilename = joiner;  
115 - //test online  
116 - OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(  
117 - onlineTransducer,  
118 - tokens,  
119 - num_threads: numThreads,  
120 - debug: isDebug,  
121 - decoding_method: decodingMethod);  
122 - foreach (string wavFile in wavFiles)  
123 - {  
124 - TimeSpan duration = TimeSpan.Zero;  
125 - List<float[]> samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration);  
126 - OnlineStream stream = onlineRecognizer.CreateStream();  
127 - start_time = new TimeSpan(DateTime.Now.Ticks);  
128 - for (int i = 0; i < samplesList.Count; i++)  
129 - {  
130 - onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]);  
131 - onlineRecognizer.DecodeStream(stream);  
132 - OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream);  
133 - Console.WriteLine(result_on.text);  
134 - }  
135 - total_duration += duration;  
136 - }  
137 - end_time = new TimeSpan(DateTime.Now.Ticks);  
138 - }  
139 - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;  
140 - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;  
141 - Console.WriteLine("num_threads:{0}", num_threads);  
142 - Console.WriteLine("decoding_method:{0}", decodingMethod);  
143 - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());  
144 - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());  
145 - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());  
146 -  
147 - Console.WriteLine("End!");  
148 - }  
149 -  
150 - static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)  
151 - {  
152 - Dictionary<string, string> argsDict = new Dictionary<string, string>();  
153 - foreach (string input in args)  
154 - {  
155 - string[] ss = input.Split("=");  
156 - if (ss.Length == 1)  
157 - {  
158 - if (!string.IsNullOrEmpty(ss[0]))  
159 - {  
160 - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));  
161 - }  
162 - }  
163 - else  
164 - {  
165 - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));  
166 - }  
167 - }  
168 - return argsDict;  
169 - }  
170 -  
171 -}  
1 -// See https://aka.ms/new-console-template for more information  
2 -// Copyright (c) 2023 by manyeyes  
3 -using SherpaOnnx;  
4 -/// Please refer to  
5 -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
6 -/// to download pre-trained models. That is, you can find encoder-xxx.onnx  
7 -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct  
8 -/// from there.  
9 -  
10 -/// download model eg:  
11 -/// (The directory where the application runs)  
12 -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory  
13 -/// cd /path/to  
14 -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20  
15 -  
16 -/// NuGet for sherpa-onnx  
17 -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx  
18 -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx  
19 -  
20 -// transducer Usage:  
21 -/*  
22 - .\SherpaOnnx.Examples.exe `  
23 - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `  
24 - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `  
25 - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `  
26 - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `  
27 - --num-threads=2 `  
28 - --decoding-method=modified_beam_search `  
29 - --debug=false `  
30 - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `  
31 - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav  
32 - */  
33 -  
34 -internal class OnlineDecodeFiles  
35 -{  
36 - static void Main(string[] args)  
37 - {  
38 - string usage = @"  
39 ------------------------------  
40 -transducer Usage:  
41 - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt `  
42 - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx `  
43 - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx `  
44 - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx `  
45 - --num-threads=2 `  
46 - --decoding-method=modified_beam_search `  
47 - --debug=false `  
48 - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav `  
49 - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav  
50 ------------------------------  
51 -";  
52 - if (args.Length == 0)  
53 - {  
54 - System.Console.WriteLine("Please enter the correct parameters:");  
55 - System.Console.WriteLine(usage);  
56 - System.Text.StringBuilder sb = new System.Text.StringBuilder();  
57 - //args = Console.ReadLine().Split(" ");  
58 - while (true)  
59 - {  
60 - string input = Console.ReadLine();  
61 - sb.AppendLine(input);  
62 - if (Console.ReadKey().Key == ConsoleKey.Enter)  
63 - break;  
64 - }  
65 - args = sb.ToString().Split("\r\n");  
66 - }  
67 - Console.WriteLine("Started!\n");  
68 - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory;  
69 - List<string> wavFiles = new List<string>();  
70 - Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles);  
71 - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : "";  
72 - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : "";  
73 - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : "";  
74 - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : "";  
75 - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : "";  
76 - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : "";  
77 - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : "";  
78 - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : "";  
79 - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : "";  
80 -  
81 - OfflineTransducer offlineTransducer = new OfflineTransducer();  
82 - offlineTransducer.EncoderFilename = encoder;  
83 - offlineTransducer.DecoderFilename = decoder;  
84 - offlineTransducer.JoinerFilename = joiner;  
85 -  
86 - OfflineParaformer offlineParaformer = new OfflineParaformer();  
87 - offlineParaformer.Model = paraformer;  
88 -  
89 - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc();  
90 - offlineNemoEncDecCtc.Model = nemo_ctc;  
91 -  
92 - int numThreads = 0;  
93 - int.TryParse(num_threads, out numThreads);  
94 - bool isDebug = false;  
95 - bool.TryParse(debug, out isDebug);  
96 -  
97 - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method;  
98 -  
99 - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))  
100 - && string.IsNullOrEmpty(paraformer)  
101 - && string.IsNullOrEmpty(nemo_ctc))  
102 - {  
103 - Console.WriteLine("Please specify at least one model");  
104 - Console.WriteLine(usage);  
105 - }  
106 - // batch decode  
107 - TimeSpan total_duration = TimeSpan.Zero;  
108 - TimeSpan start_time = TimeSpan.Zero;  
109 - TimeSpan end_time = TimeSpan.Zero;  
110 - List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();  
111 - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)))  
112 - {  
113 - OnlineTransducer onlineTransducer = new OnlineTransducer();  
114 - onlineTransducer.EncoderFilename = encoder;  
115 - onlineTransducer.DecoderFilename = decoder;  
116 - onlineTransducer.JoinerFilename = joiner;  
117 - //test online  
118 - OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(  
119 - onlineTransducer,  
120 - tokens,  
121 - num_threads: numThreads,  
122 - debug: isDebug,  
123 - decoding_method: decodingMethod);  
124 - List<float[]> samplesList = new List<float[]>();  
125 - foreach (string wavFile in wavFiles)  
126 - {  
127 - TimeSpan duration = TimeSpan.Zero;  
128 - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);  
129 - samplesList.Add(samples);  
130 - total_duration += duration;  
131 - }  
132 - start_time = new TimeSpan(DateTime.Now.Ticks);  
133 - List<OnlineStream> streams = new List<OnlineStream>();  
134 - foreach (float[] samples in samplesList)  
135 - {  
136 - OnlineStream stream = onlineRecognizer.CreateStream();  
137 - onlineRecognizer.AcceptWaveForm(stream, 16000, samples);  
138 - streams.Add(stream);  
139 - onlineRecognizer.InputFinished(stream);  
140 - }  
141 - onlineRecognizer.DecodeMultipleStreams(streams);  
142 - results = onlineRecognizer.GetResults(streams);  
143 - foreach (OnlineRecognizerResultEntity result in results)  
144 - {  
145 - Console.WriteLine(result.text);  
146 - }  
147 - end_time = new TimeSpan(DateTime.Now.Ticks);  
148 - }  
149 -  
150 -  
151 - foreach (var item in results.Zip<OnlineRecognizerResultEntity, string>(wavFiles))  
152 - {  
153 - Console.WriteLine("wavFile:{0}", item.Second);  
154 - Console.WriteLine("text:{0}", item.First.text.ToLower());  
155 - Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString());  
156 - }  
157 -  
158 - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;  
159 - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;  
160 - Console.WriteLine("num_threads:{0}", num_threads);  
161 - Console.WriteLine("decoding_method:{0}", decodingMethod);  
162 - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());  
163 - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());  
164 - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString());  
165 -  
166 - Console.WriteLine("End!");  
167 - }  
168 -  
169 - public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List<string> wavFiles, ref TimeSpan total_duration)  
170 - {  
171 - OnlineTransducer onlineTransducer = new OnlineTransducer();  
172 - onlineTransducer.EncoderFilename = encoder;  
173 - onlineTransducer.DecoderFilename = decoder;  
174 - onlineTransducer.JoinerFilename = joiner;  
175 - //test online  
176 - OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>(  
177 - onlineTransducer,  
178 - tokens,  
179 - num_threads: numThreads,  
180 - debug: isDebug,  
181 - decoding_method: decodingMethod);  
182 - List<float[]> samplesList = new List<float[]>();  
183 - foreach (string wavFile in wavFiles)  
184 - {  
185 - TimeSpan duration = TimeSpan.Zero;  
186 - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration);  
187 - samplesList.Add(samples);  
188 - total_duration += duration;  
189 - }  
190 - TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);  
191 - List<OnlineStream> streams = onlineRecognizer.CreateStreams(samplesList);  
192 - onlineRecognizer.DecodeMultipleStreams(streams);  
193 - List<OnlineRecognizerResultEntity> results = onlineRecognizer.GetResults(streams);  
194 - foreach (OnlineRecognizerResultEntity result in results)  
195 - {  
196 - Console.WriteLine(result.text);  
197 - }  
198 - TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);  
199 - }  
200 -  
201 - static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles)  
202 - {  
203 - Dictionary<string, string> argsDict = new Dictionary<string, string>();  
204 - foreach (string input in args)  
205 - {  
206 - string[] ss = input.Split("=");  
207 - if (ss.Length == 1)  
208 - {  
209 - if (!string.IsNullOrEmpty(ss[0]))  
210 - {  
211 - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' })));  
212 - }  
213 - }  
214 - else  
215 - {  
216 - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' }));  
217 - }  
218 - }  
219 - return argsDict;  
220 - }  
221 -}  
1 -#ProjectReference csharp-api  
2 -`<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />`  
3 -The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api.  
4 -This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments.  
5 -  
6 -------------  
7 -Alternatively, install sherpaonnx through nuget.  
8 -#NuGet for sherpa-onnx  
9 -PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx  
1 -using NAudio.Wave;  
2 -using System;  
3 -using System.Collections.Generic;  
4 -using System.Diagnostics;  
5 -using System.Linq;  
6 -using System.Text;  
7 -using System.Threading.Tasks;  
8 -  
9 -/// <summary>  
10 -/// audio processing  
11 -/// Copyright (c) 2023 by manyeyes  
12 -/// </summary>  
13 -public class AudioHelper  
14 -{  
15 - public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration)  
16 - {  
17 - if (!File.Exists(wavFilePath))  
18 - {  
19 - Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);  
20 - return new float[1];  
21 - }  
22 - AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);  
23 - byte[] datas = new byte[_audioFileReader.Length];  
24 - _audioFileReader.Read(datas, 0, datas.Length);  
25 - duration = _audioFileReader.TotalTime;  
26 - float[] wavdata = new float[datas.Length / sizeof(float)];  
27 - Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);  
28 - return wavdata;  
29 - }  
30 -  
31 - public static List<float[]> GetChunkSamplesList(string wavFilePath, ref TimeSpan duration)  
32 - {  
33 - List<float[]> wavdatas = new List<float[]>();  
34 - if (!File.Exists(wavFilePath))  
35 - {  
36 - Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath);  
37 - wavdatas.Add(new float[1]);  
38 - return wavdatas;  
39 - }  
40 - AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);  
41 - byte[] datas = new byte[_audioFileReader.Length];  
42 - int chunkSize = 16000;// datas.Length / sizeof(float);  
43 - int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize);  
44 - for (int i = 0; i < chunkNum; i++)  
45 - {  
46 - int offset = 0;  
47 - int dataCount = 0;  
48 - if (Math.Abs(datas.Length - i * chunkSize) > chunkSize)  
49 - {  
50 - offset = i * chunkSize;  
51 - dataCount = chunkSize;  
52 - }  
53 - else  
54 - {  
55 - offset = i * chunkSize;  
56 - dataCount = datas.Length - i * chunkSize;  
57 - }  
58 - _audioFileReader.Read(datas, offset, dataCount);  
59 - duration += _audioFileReader.TotalTime;  
60 - float[] wavdata = new float[chunkSize / sizeof(float)];  
61 - Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount);  
62 - wavdatas.Add(wavdata);  
63 -  
64 - }  
65 - return wavdatas;  
66 - }  
67 -}  
  1 +# top-most EditorConfig file
  2 +root = true
  3 +
  4 +# Don't use tabs for indentation.
  5 +[*]
  6 +indent_style = space
  7 +
  8 +# Code files
  9 +[*.{cs,csx,vb,vbx}]
  10 +indent_size = 2
  11 +insert_final_newline = true
  12 +charset = utf-8-bom
  13 +end_of_line = crlf
  1 +// Copyright (c) 2023 Xiaomi Corporation
  2 +// Copyright (c) 2023 by manyeyes
  3 +//
  4 +// This file shows how to use a non-streaming model to decode files
  5 +// Please refer to
  6 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  7 +// to download non-streaming models
  8 +using CommandLine.Text;
  9 +using CommandLine;
  10 +using SherpaOnnx;
  11 +using System.Collections.Generic;
  12 +using System;
  13 +
  14 +class OfflineDecodeFiles
  15 +{
  16 + class Options
  17 + {
  18 + [Option(Required = false, HelpText = "Path to tokens.txt")]
  19 + public string Tokens { get; set; }
  20 +
  21 + [Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")]
  22 + public string Encoder { get; set; }
  23 +
  24 + [Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")]
  25 + public string Decoder { get; set; }
  26 +
  27 + [Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")]
  28 + public string Joiner { get; set; }
  29 +
  30 + [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
  31 + public string Paraformer { get; set; }
  32 +
  33 + [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")]
  34 + public string NeMoCtc { get; set; }
  35 +
  36 + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
  37 + public int NumThreads { get; set; }
  38 +
  39 + [Option("decoding-method", Required = false, Default = "greedy_search",
  40 + HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
  41 + public string DecodingMethod { get; set; }
  42 +
  43 + [Option("max-active-paths", Required = false, Default = 4,
  44 + HelpText = @"Used only when --decoding--method is modified_beam_search.
  45 +It specifies number of active paths to keep during the search")]
  46 + public int MaxActivePaths { get; set; }
  47 +
  48 + [Option("files", Required = true, HelpText = "Audio files for decoding")]
  49 + public IEnumerable<string> Files { get; set; }
  50 + }
  51 +
  52 + static void Main(string[] args)
  53 + {
  54 + var parser = new CommandLine.Parser(with => with.HelpWriter = null);
  55 + var parserResult = parser.ParseArguments<Options>(args);
  56 +
  57 + parserResult
  58 + .WithParsed<Options>(options => Run(options))
  59 + .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  60 + }
  61 +
  62 + private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  63 + {
  64 + string usage = @"
  65 +# Zipformer
  66 +
  67 +dotnet run \
  68 + --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
  69 + --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
  70 + --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
  71 + --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
  72 + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  73 + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
  74 + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav
  75 +
  76 +Please refer to
  77 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html
  78 +to download pre-trained non-streaming zipformer models.
  79 +
  80 +# Paraformer
  81 +
  82 +dotnet run \
  83 + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
  84 + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
  85 + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  86 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
  87 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
  88 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
  89 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
  90 +
  91 +Please refer to
  92 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
  93 +to download pre-trained paraformer models
  94 +
  95 +# NeMo CTC
  96 +
  97 +dotnet run \
  98 + --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
  99 + --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  100 + --num-threads=1 \
  101 + --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
  102 + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
  103 + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
  104 +
  105 +Please refer to
  106 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
  107 +to download pre-trained paraformer models
  108 +";
  109 +
  110 + var helpText = HelpText.AutoBuild(result, h =>
  111 + {
  112 + h.AdditionalNewLineAfterOption = false;
  113 + h.Heading = usage;
  114 + h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
  115 + return HelpText.DefaultParsingErrorsHandler(result, h);
  116 + }, e => e);
  117 + Console.WriteLine(helpText);
  118 + }
  119 +
  120 + private static void Run(Options options)
  121 + {
  122 + OfflineRecognizerConfig config = new OfflineRecognizerConfig();
  123 + config.ModelConfig.Tokens = options.Tokens;
  124 +
  125 + if (!String.IsNullOrEmpty(options.Encoder))
  126 + {
  127 + // this is a transducer model
  128 + config.ModelConfig.Transducer.Encoder = options.Encoder;
  129 + config.ModelConfig.Transducer.Decoder = options.Decoder;
  130 + config.ModelConfig.Transducer.Joiner = options.Joiner;
  131 + }
  132 + else if (!String.IsNullOrEmpty(options.Paraformer))
  133 + {
  134 + config.ModelConfig.Paraformer.Model = options.Paraformer;
  135 + }
  136 + else if (!String.IsNullOrEmpty(options.NeMoCtc))
  137 + {
  138 + config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
  139 + }
  140 + else
  141 + {
  142 + Console.WriteLine("Please provide a model");
  143 + return;
  144 + }
  145 +
  146 + config.DecodingMethod = options.DecodingMethod;
  147 + config.MaxActivePaths = options.MaxActivePaths;
  148 + config.ModelConfig.Debug = 0;
  149 +
  150 + OfflineRecognizer recognizer = new OfflineRecognizer(config);
  151 +
  152 + string[] files = options.Files.ToArray();
  153 +
  154 + // We create a separate stream for each file
  155 + List<OfflineStream> streams = new List<OfflineStream>();
  156 + streams.EnsureCapacity(files.Length);
  157 +
  158 + for (int i = 0; i != files.Length; ++i)
  159 + {
  160 + OfflineStream s = recognizer.CreateStream();
  161 +
  162 + WaveReader waveReader = new WaveReader(files[i]);
  163 + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
  164 + streams.Add(s);
  165 + }
  166 +
  167 + recognizer.Decode(streams);
  168 +
  169 + // display results
  170 + for (int i = 0; i != files.Length; ++i)
  171 + {
  172 + var text = streams[i].Result.Text;
  173 + Console.WriteLine("--------------------");
  174 + Console.WriteLine(files[i]);
  175 + Console.WriteLine(text);
  176 + }
  177 + Console.WriteLine("--------------------");
  178 + }
  179 +}
  1 +../online-decode-files/WaveReader.cs
1 -<Project Sdk="Microsoft.NET.Sdk">  
2 -  
3 - <PropertyGroup>  
4 - <OutputType>Exe</OutputType>  
5 - <TargetFramework>net6.0</TargetFramework>  
6 - <RootNamespace>sherpa_onnx</RootNamespace>  
7 - <ImplicitUsings>enable</ImplicitUsings>  
8 - <Nullable>enable</Nullable>  
9 - <StartupObject>OnlineDecodeFiles</StartupObject>  
10 - </PropertyGroup>  
11 -  
12 - <ItemGroup>  
13 - <PackageReference Include="NAudio" Version="2.1.0" />  
14 - </ItemGroup>  
15 -  
16 - <ItemGroup>  
17 - <ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />  
18 - </ItemGroup>  
19 -  
20 -</Project> 1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
  5 + <TargetFramework>net6.0</TargetFramework>
  6 + <RootNamespace>offline_decode_files</RootNamespace>
  7 + <ImplicitUsings>enable</ImplicitUsings>
  8 + <Nullable>enable</Nullable>
  9 + </PropertyGroup>
  10 +
  11 + <ItemGroup>
  12 + <PackageReference Include="CommandLineParser" Version="2.9.1" />
  13 + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
  14 + </ItemGroup>
  15 +
  16 +</Project>
  1 +#!/usr/bin/env bash
  2 +
  3 +if [ ! -d ./sherpa-onnx-nemo-ctc-en-conformer-medium ]; then
  4 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium
  5 + cd sherpa-onnx-nemo-ctc-en-conformer-medium
  6 + git lfs pull --include "*.onnx"
  7 + cd ..
  8 +fi
  9 +
  10 +dotnet run \
  11 + --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \
  12 + --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
  13 + --num-threads=1 \
  14 + --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \
  15 + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \
  16 + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then
  4 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28
  5 + cd sherpa-onnx-paraformer-zh-2023-03-28
  6 + git lfs pull --include "*.onnx"
  7 + cd ..
  8 +fi
  9 +
  10 +dotnet run \
  11 + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
  12 + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
  13 + --num-threads=2 \
  14 + --files ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \
  15 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \
  16 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \
  17 + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav
  1 +#!/usr/bin/env bash
  2 +#
  3 +if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
  4 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01
  5 + cd sherpa-onnx-zipformer-en-2023-04-01
  6 + git lfs pull --include "*.onnx"
  7 + cd ..
  8 +fi
  9 +
  10 +dotnet run \
  11 + --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \
  12 + --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \
  13 + --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \
  14 + --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \
  15 + --num-threads=2 \
  16 + --decoding-method=modified_beam_search \
  17 + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \
  18 + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \
  19 + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav
  1 +// Copyright (c) 2023 Xiaomi Corporation
  2 +// Copyright (c) 2023 by manyeyes
  3 +//
  4 +// This file shows how to use a streaming model to decode files
  5 +// Please refer to
  6 +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
  7 +// to download streaming models
  8 +
  9 +using CommandLine.Text;
  10 +using CommandLine;
  11 +using SherpaOnnx;
  12 +using System.Collections.Generic;
  13 +using System.Linq;
  14 +using System;
  15 +
  16 +class OnlineDecodeFiles
  17 +{
  18 + class Options
  19 + {
  20 + [Option(Required = true, HelpText = "Path to tokens.txt")]
  21 + public string Tokens { get; set; }
  22 +
  23 + [Option(Required = true, HelpText = "Path to encoder.onnx")]
  24 + public string Encoder { get; set; }
  25 +
  26 + [Option(Required = true, HelpText = "Path to decoder.onnx")]
  27 + public string Decoder { get; set; }
  28 +
  29 + [Option(Required = true, HelpText = "Path to joiner.onnx")]
  30 + public string Joiner { get; set; }
  31 +
  32 + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")]
  33 + public int NumThreads { get; set; }
  34 +
  35 + [Option("decoding-method", Required = false, Default = "greedy_search",
  36 + HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
  37 + public string DecodingMethod { get; set; }
  38 +
  39 + [Option(Required = false, Default = false, HelpText = "True to show model info during loading")]
  40 + public bool Debug { get; set; }
  41 +
  42 + [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
  43 + public int SampleRate { get; set; }
  44 +
  45 + [Option("max-active-paths", Required = false, Default = 4,
  46 + HelpText = @"Used only when --decoding--method is modified_beam_search.
  47 +It specifies number of active paths to keep during the search")]
  48 + public int MaxActivePaths { get; set; }
  49 +
  50 + [Option("enable-endpoint", Required = false, Default = false,
  51 + HelpText = "True to enable endpoint detection.")]
  52 + public bool EnableEndpoint { get; set; }
  53 +
  54 + [Option("rule1-min-trailing-silence", Required = false, Default = 2.4F,
  55 + HelpText = @"An endpoint is detected if trailing silence in seconds is
  56 +larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")]
  57 + public float Rule1MinTrailingSilence { get; set; }
  58 +
  59 + [Option("rule2-min-trailing-silence", Required = false, Default = 1.2F,
  60 + HelpText = @"An endpoint is detected if trailing silence in seconds is
  61 +larger than this value after something that is not blank has been decoded. Used
  62 +only when --enable-endpoint is true.")]
  63 + public float Rule2MinTrailingSilence { get; set; }
  64 +
  65 + [Option("rule3-min-utterance-length", Required = false, Default = 20.0F,
  66 + HelpText = @"An endpoint is detected if the utterance in seconds is
  67 +larger than this value. Used only when --enable-endpoint is true.")]
  68 + public float Rule3MinUtteranceLength { get; set; }
  69 +
  70 + [Option("files", Required = true, HelpText = "Audio files for decoding")]
  71 + public IEnumerable<string> Files { get; set; }
  72 +
  73 + }
  74 +
  75 + static void Main(string[] args)
  76 + {
  77 + var parser = new CommandLine.Parser(with => with.HelpWriter = null);
  78 + var parserResult = parser.ParseArguments<Options>(args);
  79 +
  80 + parserResult
  81 + .WithParsed<Options>(options => Run(options))
  82 + .WithNotParsed(errs => DisplayHelp(parserResult, errs));
  83 + }
  84 +
  85 + private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
  86 + {
  87 + string usage = @"
  88 +dotnet run \
  89 + --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  90 + --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \
  91 + --decoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
  92 + --joiner=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \
  93 + --num-threads=2 \
  94 + --decoding-method=modified_beam_search \
  95 + --debug=false \
  96 + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
  97 + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav
  98 +
  99 +Please refer to
  100 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
  101 +to download pre-trained streaming models.
  102 +";
  103 +
  104 + var helpText = HelpText.AutoBuild(result, h =>
  105 + {
  106 + h.AdditionalNewLineAfterOption = false;
  107 + h.Heading = usage;
  108 + h.Copyright = "Copyright (c) 2023 Xiaomi Corporation";
  109 + return HelpText.DefaultParsingErrorsHandler(result, h);
  110 + }, e => e);
  111 + Console.WriteLine(helpText);
  112 + }
  113 +
  114 + private static void Run(Options options)
  115 + {
  116 + OnlineRecognizerConfig config = new OnlineRecognizerConfig();
  117 + config.FeatConfig.SampleRate = options.SampleRate;
  118 +
  119 + // All models from icefall using feature dim 80.
  120 + // You can change it if your model has a different feature dim.
  121 + config.FeatConfig.FeatureDim = 80;
  122 +
  123 + config.TransducerModelConfig.Encoder = options.Encoder;
  124 + config.TransducerModelConfig.Decoder = options.Decoder;
  125 + config.TransducerModelConfig.Joiner = options.Joiner;
  126 + config.TransducerModelConfig.Tokens = options.Tokens;
  127 + config.TransducerModelConfig.NumThreads = options.NumThreads;
  128 + config.TransducerModelConfig.Debug = options.Debug ? 1 : 0;
  129 +
  130 + config.DecodingMethod = options.DecodingMethod;
  131 + config.MaxActivePaths = options.MaxActivePaths;
  132 + config.EnableEndpoint = options.EnableEndpoint ? 1 : 0;
  133 +
  134 + config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence;
  135 + config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence;
  136 + config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength;
  137 +
  138 + OnlineRecognizer recognizer = new OnlineRecognizer(config);
  139 +
  140 + string[] files = options.Files.ToArray();
  141 +
  142 + // We create a separate stream for each file
  143 + List<OnlineStream> streams = new List<OnlineStream>();
  144 + streams.EnsureCapacity(files.Length);
  145 +
  146 + for (int i = 0; i != files.Length; ++i)
  147 + {
  148 + OnlineStream s = recognizer.CreateStream();
  149 +
  150 + WaveReader waveReader = new WaveReader(files[i]);
  151 + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
  152 +
  153 + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
  154 + s.AcceptWaveform(waveReader.SampleRate, tailPadding);
  155 + s.InputFinished();
  156 +
  157 + streams.Add(s);
  158 + }
  159 +
  160 + while (true)
  161 + {
  162 + var readyStreams = streams.Where(s => recognizer.IsReady(s));
  163 + if (!readyStreams.Any())
  164 + {
  165 + break;
  166 + }
  167 +
  168 + recognizer.Decode(readyStreams);
  169 + }
  170 +
  171 + // display results
  172 + for (int i = 0; i != files.Length; ++i)
  173 + {
  174 + var text = recognizer.GetResult(streams[i]).Text;
  175 + Console.WriteLine("--------------------");
  176 + Console.WriteLine(files[i]);
  177 + Console.WriteLine(text);
  178 + }
  179 + Console.WriteLine("--------------------");
  180 + }
  181 +}
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +using System;
  3 +using System.IO;
  4 +
  5 +using System.Runtime.InteropServices;
  6 +
  7 +namespace SherpaOnnx
  8 +{
  9 +
  10 + [StructLayout(LayoutKind.Sequential)]
  11 + public struct WaveHeader
  12 + {
  13 + public Int32 ChunkID;
  14 + public Int32 ChunkSize;
  15 + public Int32 Format;
  16 + public Int32 SubChunk1ID;
  17 + public Int32 SubChunk1Size;
  18 + public Int16 AudioFormat;
  19 + public Int16 NumChannels;
  20 + public Int32 SampleRate;
  21 + public Int32 ByteRate;
  22 + public Int16 BlockAlign;
  23 + public Int16 BitsPerSample;
  24 + public Int32 SubChunk2ID;
  25 + public Int32 SubChunk2Size;
  26 +
  27 + public bool Validate()
  28 + {
  29 + if (ChunkID != 0x46464952)
  30 + {
  31 + Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952");
  32 + return false;
  33 + }
  34 +
  35 + // E V A W
  36 + if (Format != 0x45564157)
  37 + {
  38 + Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157");
  39 + return false;
  40 + }
  41 +
  42 + // t m f
  43 + if (SubChunk1ID != 0x20746d66)
  44 + {
  45 + Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66");
  46 + return false;
  47 + }
  48 +
  49 + if (SubChunk1Size != 16)
  50 + {
  51 + Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16");
  52 + return false;
  53 + }
  54 +
  55 + if (AudioFormat != 1)
  56 + {
  57 + Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1");
  58 + return false;
  59 + }
  60 +
  61 + if (NumChannels != 1)
  62 + {
  63 + Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1");
  64 + return false;
  65 + }
  66 +
  67 + if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8))
  68 + {
  69 + Console.WriteLine($"Invalid byte rate: {ByteRate}.");
  70 + return false;
  71 + }
  72 +
  73 + if (BlockAlign != (NumChannels * BitsPerSample / 8))
  74 + {
  75 + Console.WriteLine($"Invalid block align: {ByteRate}.");
  76 + return false;
  77 + }
  78 +
  79 + if (BitsPerSample != 16)
  80 + { // we support only 16 bits per sample
  81 + Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16");
  82 + return false;
  83 + }
  84 +
  85 + return true;
  86 + }
  87 + }
  88 +
  89 + // It supports only 16-bit, single channel WAVE format.
  90 + // The sample rate can be any value.
  91 + public class WaveReader
  92 + {
  93 + public WaveReader(String fileName)
  94 + {
  95 + if (!File.Exists(fileName))
  96 + {
  97 + throw new ApplicationException($"{fileName} does not exist!");
  98 + }
  99 +
  100 + using (var stream = File.Open(fileName, FileMode.Open))
  101 + {
  102 + using (var reader = new BinaryReader(stream))
  103 + {
  104 + _header = ReadHeader(reader);
  105 +
  106 + if (!_header.Validate())
  107 + {
  108 + throw new ApplicationException($"Invalid wave file ${fileName}");
  109 + }
  110 +
  111 + SkipMetaData(reader);
  112 +
  113 + // now read samples
  114 + // _header.SubChunk2Size contains number of bytes in total.
  115 + // we assume each sample is of type int16
  116 + byte[] buffer = reader.ReadBytes(_header.SubChunk2Size);
  117 + short[] samples_int16 = new short[_header.SubChunk2Size / 2];
  118 + Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length);
  119 +
  120 + _samples = new float[samples_int16.Length];
  121 +
  122 + for (var i = 0; i < samples_int16.Length; ++i)
  123 + {
  124 + _samples[i] = samples_int16[i] / 32768.0F;
  125 + }
  126 + }
  127 + }
  128 + }
  129 +
  130 + private static WaveHeader ReadHeader(BinaryReader reader)
  131 + {
  132 + byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader)));
  133 +
  134 + GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
  135 + WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!;
  136 + handle.Free();
  137 +
  138 + return header;
  139 + }
  140 +
  141 + private void SkipMetaData(BinaryReader reader)
  142 + {
  143 + var bs = reader.BaseStream;
  144 +
  145 + Int32 subChunk2ID = _header.SubChunk2ID;
  146 + Int32 subChunk2Size = _header.SubChunk2Size;
  147 +
  148 + while (bs.Position != bs.Length && subChunk2ID != 0x61746164)
  149 + {
  150 + bs.Seek(subChunk2Size, SeekOrigin.Current);
  151 + subChunk2ID = reader.ReadInt32();
  152 + subChunk2Size = reader.ReadInt32();
  153 + }
  154 + _header.SubChunk2ID = subChunk2ID;
  155 + _header.SubChunk2Size = subChunk2Size;
  156 + }
  157 +
  158 + private WaveHeader _header;
  159 +
  160 + // Samples are normalized to the range [-1, 1]
  161 + private float[] _samples;
  162 +
  163 + public int SampleRate => _header.SampleRate;
  164 + public float[] Samples => _samples;
  165 +
  166 + public static void Test(String fileName)
  167 + {
  168 + WaveReader reader = new WaveReader(fileName);
  169 + Console.WriteLine($"samples length: {reader.Samples.Length}");
  170 + Console.WriteLine($"samples rate: {reader.SampleRate}");
  171 + }
  172 + }
  173 +
  174 +}
1 <Project Sdk="Microsoft.NET.Sdk"> 1 <Project Sdk="Microsoft.NET.Sdk">
2 2
3 <PropertyGroup> 3 <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
4 <TargetFramework>net6.0</TargetFramework> 5 <TargetFramework>net6.0</TargetFramework>
  6 + <RootNamespace>online_decode_files</RootNamespace>
5 <ImplicitUsings>enable</ImplicitUsings> 7 <ImplicitUsings>enable</ImplicitUsings>
6 <Nullable>enable</Nullable> 8 <Nullable>enable</Nullable>
7 - <AllowUnsafeBlocks>true</AllowUnsafeBlocks>  
8 </PropertyGroup> 9 </PropertyGroup>
9 10
  11 + <ItemGroup>
  12 + <PackageReference Include="CommandLineParser" Version="2.9.1" />
  13 + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
  14 + </ItemGroup>
  15 +
10 </Project> 16 </Project>
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
  5 +# to download the model files
  6 +
  7 +if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
  8 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  9 + cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
  10 + git lfs pull --include "*.onnx"
  11 + cd ..
  12 +fi
  13 +
  14 +dotnet run -c Release \
  15 + --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
  16 + --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
  17 + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \
  18 + --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
  19 + --decoding-method greedy_search \
  20 + --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \
  21 + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \
  1 +
  2 +Microsoft Visual Studio Solution File, Format Version 12.00
  3 +# Visual Studio Version 17
  4 +VisualStudioVersion = 17.0.31903.59
  5 +MinimumVisualStudioVersion = 10.0.40219.1
  6 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}"
  7 +EndProject
  8 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}"
  9 +EndProject
  10 +Global
  11 + GlobalSection(SolutionConfigurationPlatforms) = preSolution
  12 + Debug|Any CPU = Debug|Any CPU
  13 + Release|Any CPU = Release|Any CPU
  14 + EndGlobalSection
  15 + GlobalSection(SolutionProperties) = preSolution
  16 + HideSolutionNode = FALSE
  17 + EndGlobalSection
  18 + GlobalSection(ProjectConfigurationPlatforms) = postSolution
  19 + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
  20 + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU
  21 + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
  22 + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.Build.0 = Release|Any CPU
  23 + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
  24 + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.Build.0 = Debug|Any CPU
  25 + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.ActiveCfg = Release|Any CPU
  26 + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.Build.0 = Release|Any CPU
  27 + EndGlobalSection
  28 +EndGlobal
  1 +all
  2 +macos
  3 +linux
  4 +windows
  5 +packages
  1 +# Introduction
  2 +
  3 +[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is an open-source
  4 +real-time speech recognition toolkit developed
  5 +by the Next-gen Kaldi team.
  6 +
  7 +It supports streaming recognition on a variety of
  8 +platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.
  9 +
  10 +It does not require Internet connection during recognition.
  11 +
  12 +See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
  13 +for details.
  14 +
  15 +Please see
  16 +https://github.com/k2-fsa/sherpa-onnx/tree/dot-net/dotnet-examples
  17 +for how to use C# APIs of this package.
  1 +#!/usr/bin/env python3
  2 +# Copyright (c) 2023 Xiaomi Corporation
  3 +
  4 +import argparse
  5 +import re
  6 +from pathlib import Path
  7 +
  8 +import jinja2
  9 +
  10 +SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent
  11 +
  12 +
  13 +def get_version():
  14 + cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt"
  15 + with open(cmake_file) as f:
  16 + content = f.read()
  17 +
  18 + version = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content).group(1)
  19 + return version.strip('"')
  20 +
  21 +
  22 +def read_proj_file(filename):
  23 + with open(filename) as f:
  24 + return f.read()
  25 +
  26 +
  27 +def get_dict():
  28 + version = get_version()
  29 + return {
  30 + "version": get_version(),
  31 + }
  32 +
  33 +
  34 +def process_linux(s):
  35 + libs = [
  36 + "libkaldi-native-fbank-core.so",
  37 + "libonnxruntime.so.1.14.0",
  38 + "libsherpa-onnx-c-api.so",
  39 + "libsherpa-onnx-core.so",
  40 + ]
  41 + prefix = f"{SHERPA_ONNX_DIR}/linux/sherpa_onnx/lib/"
  42 + libs = [prefix + lib for lib in libs]
  43 + libs = "\n ;".join(libs)
  44 +
  45 + d = get_dict()
  46 + d["dotnet_rid"] = "linux-x64"
  47 + d["libs"] = libs
  48 +
  49 + environment = jinja2.Environment()
  50 + template = environment.from_string(s)
  51 + s = template.render(**d)
  52 + with open("./linux/sherpa-onnx.runtime.csproj", "w") as f:
  53 + f.write(s)
  54 +
  55 +
  56 +def process_macos(s):
  57 + libs = [
  58 + "libkaldi-native-fbank-core.dylib",
  59 + "libonnxruntime.1.14.0.dylib",
  60 + "libsherpa-onnx-c-api.dylib",
  61 + "libsherpa-onnx-core.dylib",
  62 + ]
  63 + prefix = f"{SHERPA_ONNX_DIR}/macos/sherpa_onnx/lib/"
  64 + libs = [prefix + lib for lib in libs]
  65 + libs = "\n ;".join(libs)
  66 +
  67 + d = get_dict()
  68 + d["dotnet_rid"] = "osx-x64"
  69 + d["libs"] = libs
  70 +
  71 + environment = jinja2.Environment()
  72 + template = environment.from_string(s)
  73 + s = template.render(**d)
  74 + with open("./macos/sherpa-onnx.runtime.csproj", "w") as f:
  75 + f.write(s)
  76 +
  77 +
  78 +def process_windows(s):
  79 + libs = [
  80 + "kaldi-native-fbank-core.dll",
  81 + "onnxruntime.dll",
  82 + "sherpa-onnx-c-api.dll",
  83 + "sherpa-onnx-core.dll",
  84 + ]
  85 + prefix = f"{SHERPA_ONNX_DIR}/windows/sherpa_onnx/lib/"
  86 + libs = [prefix + lib for lib in libs]
  87 + libs = "\n ;".join(libs)
  88 +
  89 + d = get_dict()
  90 + d["dotnet_rid"] = "win-x64"
  91 + d["libs"] = libs
  92 +
  93 + environment = jinja2.Environment()
  94 + template = environment.from_string(s)
  95 + s = template.render(**d)
  96 + with open("./windows/sherpa-onnx.runtime.csproj", "w") as f:
  97 + f.write(s)
  98 +
  99 +
  100 +def main():
  101 + s = read_proj_file("./sherpa-onnx.csproj.runtime.in")
  102 + process_macos(s)
  103 + process_linux(s)
  104 + process_windows(s)
  105 +
  106 + s = read_proj_file("./sherpa-onnx.csproj.in")
  107 + d = get_dict()
  108 + d["packages_dir"] = str(SHERPA_ONNX_DIR / "scripts/dotnet/packages")
  109 +
  110 + environment = jinja2.Environment()
  111 + template = environment.from_string(s)
  112 + s = template.render(**d)
  113 + with open("./all/sherpa-onnx.csproj", "w") as f:
  114 + f.write(s)
  115 +
  116 +
  117 +if __name__ == "__main__":
  118 + main()
  1 +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +/// Copyright (c) 2023 by manyeyes
  3 +
  4 +using System.Linq;
  5 +using System.Collections.Generic;
  6 +using System.Runtime.InteropServices;
  7 +using System;
  8 +
  9 +namespace SherpaOnnx
  10 +{
  11 +
  12 + [StructLayout(LayoutKind.Sequential)]
  13 + public struct OfflineTransducerModelConfig
  14 + {
  15 + public OfflineTransducerModelConfig()
  16 + {
  17 + Encoder = "";
  18 + Decoder = "";
  19 + Joiner = "";
  20 + }
  21 + [MarshalAs(UnmanagedType.LPStr)]
  22 + public string Encoder;
  23 +
  24 + [MarshalAs(UnmanagedType.LPStr)]
  25 + public string Decoder;
  26 +
  27 + [MarshalAs(UnmanagedType.LPStr)]
  28 + public string Joiner;
  29 + }
  30 +
  31 + [StructLayout(LayoutKind.Sequential)]
  32 + public struct OfflineParaformerModelConfig
  33 + {
  34 + public OfflineParaformerModelConfig()
  35 + {
  36 + Model = "";
  37 + }
  38 + [MarshalAs(UnmanagedType.LPStr)]
  39 + public string Model;
  40 + }
  41 +
  42 + [StructLayout(LayoutKind.Sequential)]
  43 + public struct OfflineNemoEncDecCtcModelConfig
  44 + {
  45 + public OfflineNemoEncDecCtcModelConfig()
  46 + {
  47 + Model = "";
  48 + }
  49 + [MarshalAs(UnmanagedType.LPStr)]
  50 + public string Model;
  51 + }
  52 +
  53 + [StructLayout(LayoutKind.Sequential)]
  54 + public struct OfflineLMConfig
  55 + {
  56 + public OfflineLMConfig()
  57 + {
  58 + Model = "";
  59 + Scale = 0.5F;
  60 + }
  61 + [MarshalAs(UnmanagedType.LPStr)]
  62 + public string Model;
  63 +
  64 + public float Scale;
  65 + }
  66 +
  67 + [StructLayout(LayoutKind.Sequential)]
  68 + public struct OfflineModelConfig
  69 + {
  70 + public OfflineModelConfig()
  71 + {
  72 + Transducer = new OfflineTransducerModelConfig();
  73 + Paraformer = new OfflineParaformerModelConfig();
  74 + NeMoCtc = new OfflineNemoEncDecCtcModelConfig();
  75 + Tokens = "";
  76 + NumThreads = 1;
  77 + Debug = 0;
  78 + }
  79 + public OfflineTransducerModelConfig Transducer;
  80 + public OfflineParaformerModelConfig Paraformer;
  81 + public OfflineNemoEncDecCtcModelConfig NeMoCtc;
  82 +
  83 + [MarshalAs(UnmanagedType.LPStr)]
  84 + public string Tokens;
  85 +
  86 + public int NumThreads;
  87 +
  88 + public int Debug;
  89 + }
  90 +
  91 + [StructLayout(LayoutKind.Sequential)]
  92 + public struct OfflineRecognizerConfig
  93 + {
  94 + public OfflineRecognizerConfig()
  95 + {
  96 + FeatConfig = new FeatureConfig();
  97 + ModelConfig = new OfflineModelConfig();
  98 + LmConfig = new OfflineLMConfig();
  99 +
  100 + DecodingMethod = "greedy_search";
  101 + MaxActivePaths = 4;
  102 +
  103 + }
  104 + public FeatureConfig FeatConfig;
  105 + public OfflineModelConfig ModelConfig;
  106 + public OfflineLMConfig LmConfig;
  107 +
  108 + [MarshalAs(UnmanagedType.LPStr)]
  109 + public string DecodingMethod;
  110 +
  111 + public int MaxActivePaths;
  112 + }
  113 +
  114 + public class OfflineRecognizerResult
  115 + {
  116 + public OfflineRecognizerResult(IntPtr handle)
  117 + {
  118 + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
  119 + _text = Marshal.PtrToStringUTF8(impl.Text);
  120 + }
  121 +
  122 + [StructLayout(LayoutKind.Sequential)]
  123 + struct Impl
  124 + {
  125 + public IntPtr Text;
  126 + }
  127 +
  128 + private String _text;
  129 + public String Text => _text;
  130 + }
  131 +
  132 + public class OfflineStream : IDisposable
  133 + {
  134 + public OfflineStream(IntPtr p)
  135 + {
  136 + _handle = new HandleRef(this, p);
  137 + }
  138 +
  139 + public void AcceptWaveform(int sampleRate, float[] samples)
  140 + {
  141 + AcceptWaveform(Handle, sampleRate, samples, samples.Length);
  142 + }
  143 +
  144 + public OfflineRecognizerResult Result
  145 + {
  146 + get
  147 + {
  148 + IntPtr h = GetResult(_handle.Handle);
  149 + OfflineRecognizerResult result = new OfflineRecognizerResult(h);
  150 + DestroyResult(h);
  151 + return result;
  152 + }
  153 + }
  154 +
  155 + ~OfflineStream()
  156 + {
  157 + Cleanup();
  158 + }
  159 +
  160 + public void Dispose()
  161 + {
  162 + Cleanup();
  163 + // Prevent the object from being placed on the
  164 + // finalization queue
  165 + System.GC.SuppressFinalize(this);
  166 + }
  167 +
  168 + private void Cleanup()
  169 + {
  170 + DestroyOfflineStream(Handle);
  171 +
  172 + // Don't permit the handle to be used again.
  173 + _handle = new HandleRef(this, IntPtr.Zero);
  174 + }
  175 +
  176 + private HandleRef _handle;
  177 + public IntPtr Handle => _handle.Handle;
  178 +
  179 + [DllImport(Dll.Filename)]
  180 + private static extern void DestroyOfflineStream(IntPtr handle);
  181 +
  182 + [DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")]
  183 + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);
  184 +
  185 + [DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")]
  186 + private static extern IntPtr GetResult(IntPtr handle);
  187 +
  188 + [DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")]
  189 + private static extern void DestroyResult(IntPtr handle);
  190 + }
  191 +
  192 + public class OfflineRecognizer : IDisposable
  193 + {
  194 + public OfflineRecognizer(OfflineRecognizerConfig config)
  195 + {
  196 + IntPtr h = CreateOfflineRecognizer(ref config);
  197 + _handle = new HandleRef(this, h);
  198 + }
  199 +
  200 + public OfflineStream CreateStream()
  201 + {
  202 + IntPtr p = CreateOfflineStream(_handle.Handle);
  203 + return new OfflineStream(p);
  204 + }
  205 +
  206 + /// You have to ensure that IsReady(stream) returns true before
  207 + /// you call this method
  208 + public void Decode(OfflineStream stream)
  209 + {
  210 + Decode(_handle.Handle, stream.Handle);
  211 + }
  212 +
  213 + // The caller should ensure all passed streams are ready for decoding.
  214 + public void Decode(IEnumerable<OfflineStream> streams)
  215 + {
  216 + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
  217 + Decode(_handle.Handle, ptrs, ptrs.Length);
  218 + }
  219 +
  220 + public void Dispose()
  221 + {
  222 + Cleanup();
  223 + // Prevent the object from being placed on the
  224 + // finalization queue
  225 + System.GC.SuppressFinalize(this);
  226 + }
  227 +
  228 + ~OfflineRecognizer()
  229 + {
  230 + Cleanup();
  231 + }
  232 +
  233 + private void Cleanup()
  234 + {
  235 + DestroyOfflineRecognizer(_handle.Handle);
  236 +
  237 + // Don't permit the handle to be used again.
  238 + _handle = new HandleRef(this, IntPtr.Zero);
  239 + }
  240 +
  241 + private HandleRef _handle;
  242 +
  243 + [DllImport(Dll.Filename)]
  244 + private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config);
  245 +
  246 + [DllImport(Dll.Filename)]
  247 + private static extern void DestroyOfflineRecognizer(IntPtr handle);
  248 +
  249 + [DllImport(Dll.Filename)]
  250 + private static extern IntPtr CreateOfflineStream(IntPtr handle);
  251 +
  252 + [DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")]
  253 + private static extern void Decode(IntPtr handle, IntPtr stream);
  254 +
  255 + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")]
  256 + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
  257 + }
  258 +
  259 +}
  1 +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +/// Copyright (c) 2023 by manyeyes
  3 +
  4 +using System.Linq;
  5 +using System.Collections.Generic;
  6 +using System.Runtime.InteropServices;
  7 +using System;
  8 +
  9 +namespace SherpaOnnx
  10 +{
  11 + internal static class Dll
  12 + {
  13 + public const string Filename = "sherpa-onnx-c-api";
  14 + }
  15 +
  16 + [StructLayout(LayoutKind.Sequential)]
  17 + public struct OnlineTransducerModelConfig
  18 + {
  19 + public OnlineTransducerModelConfig()
  20 + {
  21 + Encoder = "";
  22 + Decoder = "";
  23 + Joiner = "";
  24 + Tokens = "";
  25 + NumThreads = 1;
  26 + Debug = 0;
  27 + }
  28 + [MarshalAs(UnmanagedType.LPStr)]
  29 + public string Encoder;
  30 +
  31 + [MarshalAs(UnmanagedType.LPStr)]
  32 + public string Decoder;
  33 +
  34 + [MarshalAs(UnmanagedType.LPStr)]
  35 + public string Joiner;
  36 +
  37 + [MarshalAs(UnmanagedType.LPStr)]
  38 + public string Tokens;
  39 +
  40 + /// Number of threads used to run the neural network model
  41 + public int NumThreads;
  42 +
  43 + /// true to print debug information of the model
  44 + public int Debug;
  45 + }
  46 +
  47 + /// It expects 16 kHz 16-bit single channel wave format.
  48 + [StructLayout(LayoutKind.Sequential)]
  49 + public struct FeatureConfig
  50 + {
  51 + public FeatureConfig()
  52 + {
  53 + SampleRate = 16000;
  54 + FeatureDim = 80;
  55 + }
  56 + /// Sample rate of the input data. MUST match the one expected
  57 + /// by the model. For instance, it should be 16000 for models provided
  58 + /// by us.
  59 + public int SampleRate;
  60 +
  61 + /// Feature dimension of the model.
  62 + /// For instance, it should be 80 for models provided by us.
  63 + public int FeatureDim;
  64 + }
  65 +
  66 + [StructLayout(LayoutKind.Sequential)]
  67 + public struct OnlineRecognizerConfig
  68 + {
  69 + public OnlineRecognizerConfig()
  70 + {
  71 + FeatConfig = new FeatureConfig();
  72 + TransducerModelConfig = new OnlineTransducerModelConfig();
  73 + DecodingMethod = "greedy_search";
  74 + MaxActivePaths = 4;
  75 + EnableEndpoint = 0;
  76 + Rule1MinTrailingSilence = 1.2F;
  77 + Rule2MinTrailingSilence = 2.4F;
  78 + Rule3MinUtteranceLength = 20.0F;
  79 + }
  80 + public FeatureConfig FeatConfig;
  81 + public OnlineTransducerModelConfig TransducerModelConfig;
  82 +
  83 + [MarshalAs(UnmanagedType.LPStr)]
  84 + public string DecodingMethod;
  85 +
  86 + /// Used only when decoding_method is modified_beam_search
  87 + /// Example value: 4
  88 + public int MaxActivePaths;
  89 +
  90 + /// 0 to disable endpoint detection.
  91 + /// A non-zero value to enable endpoint detection.
  92 + public int EnableEndpoint;
  93 +
  94 + /// An endpoint is detected if trailing silence in seconds is larger than
  95 + /// this value even if nothing has been decoded.
  96 + /// Used only when enable_endpoint is not 0.
  97 + public float Rule1MinTrailingSilence;
  98 +
  99 + /// An endpoint is detected if trailing silence in seconds is larger than
  100 + /// this value after something that is not blank has been decoded.
  101 + /// Used only when enable_endpoint is not 0.
  102 + public float Rule2MinTrailingSilence;
  103 +
  104 + /// An endpoint is detected if the utterance in seconds is larger than
  105 + /// this value.
  106 + /// Used only when enable_endpoint is not 0.
  107 + public float Rule3MinUtteranceLength;
  108 + }
  109 +
  110 + public class OnlineRecognizerResult
  111 + {
  112 + public OnlineRecognizerResult(IntPtr handle)
  113 + {
  114 + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
  115 + _text = Marshal.PtrToStringUTF8(impl.Text);
  116 + }
  117 +
  118 + [StructLayout(LayoutKind.Sequential)]
  119 + struct Impl
  120 + {
  121 + public IntPtr Text;
  122 + }
  123 +
  124 + private String _text;
  125 + public String Text => _text;
  126 + }
  127 +
  128 + public class OnlineStream : IDisposable
  129 + {
  130 + public OnlineStream(IntPtr p)
  131 + {
  132 + _handle = new HandleRef(this, p);
  133 + }
  134 +
  135 + public void AcceptWaveform(int sampleRate, float[] samples)
  136 + {
  137 + AcceptWaveform(Handle, sampleRate, samples, samples.Length);
  138 + }
  139 +
  140 + public void InputFinished()
  141 + {
  142 + InputFinished(Handle);
  143 + }
  144 +
  145 + ~OnlineStream()
  146 + {
  147 + Cleanup();
  148 + }
  149 +
  150 + public void Dispose()
  151 + {
  152 + Cleanup();
  153 + // Prevent the object from being placed on the
  154 + // finalization queue
  155 + System.GC.SuppressFinalize(this);
  156 + }
  157 +
  158 + private void Cleanup()
  159 + {
  160 + DestroyOnlineStream(Handle);
  161 +
  162 + // Don't permit the handle to be used again.
  163 + _handle = new HandleRef(this, IntPtr.Zero);
  164 + }
  165 +
  166 + private HandleRef _handle;
  167 + public IntPtr Handle => _handle.Handle;
  168 +
  169 + [DllImport(Dll.Filename)]
  170 + private static extern void DestroyOnlineStream(IntPtr handle);
  171 +
  172 + [DllImport(Dll.Filename)]
  173 + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n);
  174 +
  175 + [DllImport(Dll.Filename)]
  176 + private static extern void InputFinished(IntPtr handle);
  177 + }
  178 +
  179 + // please see
  180 + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code
  181 + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources
  182 + public class OnlineRecognizer : IDisposable
  183 + {
  184 + public OnlineRecognizer(OnlineRecognizerConfig config)
  185 + {
  186 + IntPtr h = CreateOnlineRecognizer(ref config);
  187 + _handle = new HandleRef(this, h);
  188 + }
  189 +
  190 + public OnlineStream CreateStream()
  191 + {
  192 + IntPtr p = CreateOnlineStream(_handle.Handle);
  193 + return new OnlineStream(p);
  194 + }
  195 +
  196 + /// Return true if the passed stream is ready for decoding.
  197 + public bool IsReady(OnlineStream stream)
  198 + {
  199 + return IsReady(_handle.Handle, stream.Handle) != 0;
  200 + }
  201 +
  202 + /// Return true if an endpoint is detected for this stream.
  203 + /// You probably need to invoke Reset(stream) when this method returns
  204 + /// true.
  205 + public bool IsEndpoint(OnlineStream stream)
  206 + {
  207 + return IsEndpoint(_handle.Handle, stream.Handle) != 0;
  208 + }
  209 +
  210 + /// You have to ensure that IsReady(stream) returns true before
  211 + /// you call this method
  212 + public void Decode(OnlineStream stream)
  213 + {
  214 + Decode(_handle.Handle, stream.Handle);
  215 + }
  216 +
  217 + // The caller should ensure all passed streams are ready for decoding.
  218 + public void Decode(IEnumerable<OnlineStream> streams)
  219 + {
  220 + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
  221 + Decode(_handle.Handle, ptrs, ptrs.Length);
  222 + }
  223 +
  224 + public OnlineRecognizerResult GetResult(OnlineStream stream)
  225 + {
  226 + IntPtr h = GetResult(_handle.Handle, stream.Handle);
  227 + OnlineRecognizerResult result = new OnlineRecognizerResult(h);
  228 + DestroyResult(h);
  229 + return result;
  230 + }
  231 +
  232 + /// When this method returns, IsEndpoint(stream) will return false.
  233 + public void Reset(OnlineStream stream)
  234 + {
  235 + Reset(_handle.Handle, stream.Handle);
  236 + }
  237 +
  238 + public void Dispose()
  239 + {
  240 + Cleanup();
  241 + // Prevent the object from being placed on the
  242 + // finalization queue
  243 + System.GC.SuppressFinalize(this);
  244 + }
  245 +
  246 + ~OnlineRecognizer()
  247 + {
  248 + Cleanup();
  249 + }
  250 +
  251 + private void Cleanup()
  252 + {
  253 + DestroyOnlineRecognizer(_handle.Handle);
  254 +
  255 + // Don't permit the handle to be used again.
  256 + _handle = new HandleRef(this, IntPtr.Zero);
  257 + }
  258 +
  259 + private HandleRef _handle;
  260 +
  261 + [DllImport(Dll.Filename)]
  262 + private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config);
  263 +
  264 + [DllImport(Dll.Filename)]
  265 + private static extern void DestroyOnlineRecognizer(IntPtr handle);
  266 +
  267 + [DllImport(Dll.Filename)]
  268 + private static extern IntPtr CreateOnlineStream(IntPtr handle);
  269 +
  270 + [DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")]
  271 + private static extern int IsReady(IntPtr handle, IntPtr stream);
  272 +
  273 + [DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")]
  274 + private static extern void Decode(IntPtr handle, IntPtr stream);
  275 +
  276 + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")]
  277 + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
  278 +
  279 + [DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")]
  280 + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream);
  281 +
  282 + [DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")]
  283 + private static extern void DestroyResult(IntPtr result);
  284 +
  285 + [DllImport(Dll.Filename)]
  286 + private static extern void Reset(IntPtr handle, IntPtr stream);
  287 +
  288 + [DllImport(Dll.Filename)]
  289 + private static extern int IsEndpoint(IntPtr handle, IntPtr stream);
  290 + }
  291 +}
  1 +#!/usr/bin/env bash
  2 +# Copyright (c) 2023 Xiaomi Corporation
  3 +
  4 +set -ex
  5 +
  6 +mkdir -p macos linux windows all
  7 +
  8 +cp ./online.cs all
  9 +cp ./offline.cs all
  10 +
  11 +./generate.py
  12 +
  13 +pushd linux
  14 +dotnet build -c Release
  15 +dotnet pack -c Release -o ../packages
  16 +popd
  17 +
  18 +pushd macos
  19 +dotnet build -c Release
  20 +dotnet pack -c Release -o ../packages
  21 +popd
  22 +
  23 +pushd windows
  24 +dotnet build -c Release
  25 +dotnet pack -c Release -o ../packages
  26 +popd
  27 +
  28 +pushd all
  29 +dotnet build -c Release
  30 +dotnet pack -c Release -o ../packages
  31 +popd
  32 +
  33 +ls -lh packages
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 + <PropertyGroup>
  3 + <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
  4 + <PackageReadmeFile>README.md</PackageReadmeFile>
  5 + <OutputType>Library</OutputType>
  6 + <LangVersion>10.0</LangVersion>
  7 + <TargetFrameworks>netstandard2.1;netcoreapp3.1;net6.0;net7.0</TargetFrameworks>
  8 + <RuntimeIdentifiers>linux-x64;osx-x64;win-x64</RuntimeIdentifiers>
  9 + <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  10 + <AssemblyName>sherpa-onnx</AssemblyName>
  11 + <Version>{{ version }}</Version>
  12 +
  13 + <PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
  14 + <RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
  15 + <PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
  16 + privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>
  17 +
  18 + <Authors>The Next-gen Kaldi development team</Authors>
  19 + <Owners>The Next-gen Kaldi development team</Owners>
  20 + <Company>Xiaomi Corporation</Company>
  21 + <Copyright>Copyright 2019-2023 Xiaomi Corporation</Copyright>
  22 + <Description>sherpa-onnx is an open-source real-time speech recognition toolkit developed
  23 + by the Next-gen Kaldi team. It supports streaming recognition on a variety of
  24 + platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc.
  25 +
  26 + It does not require Internet connection during recognition.
  27 +
  28 + See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html
  29 + for details.
  30 + </Description>
  31 +
  32 + <!-- Pack Option -->
  33 + <Title>sherpa-onnx v{{ version }}</Title>
  34 + <PackageId>org.k2fsa.sherpa.onnx</PackageId>
  35 +
  36 + <!-- Signing -->
  37 + <SignAssembly>false</SignAssembly>
  38 + <PublicSign>false</PublicSign>
  39 + <DelaySign>false</DelaySign>
  40 + </PropertyGroup>
  41 +
  42 + <PropertyGroup>
  43 + <RestoreSources>{{ packages_dir }};$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
  44 + </PropertyGroup>
  45 +
  46 + <ItemGroup>
  47 + <None Include="../README.md" Pack="true" PackagePath="/"/>
  48 + </ItemGroup>
  49 +
  50 + <ItemGroup>
  51 + <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.linux-x64" Version="{{ version }}" />
  52 + <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.osx-x64" Version="{{ version }}" />
  53 + <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-x64" Version="{{ version }}" />
  54 + </ItemGroup>
  55 +
  56 +</Project>
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 + <PropertyGroup>
  3 + <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
  4 + <PackageReadmeFile>README.md</PackageReadmeFile>
  5 + <OutputType>Library</OutputType>
  6 + <TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0</TargetFrameworks>
  7 + <RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
  8 + <AssemblyName>sherpa-onnx</AssemblyName>
  9 + <Version>{{ version }}</Version>
  10 +
  11 + <PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl>
  12 + <RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl>
  13 + <PackageTags>speech recognition voice audio stt asr speech-to-text AI offline
  14 + privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags>
  15 +
  16 + <!-- Nuget Properties -->
  17 + <Description>.NET native {{ dotnet_rid }} wrapper for the sherpa-onnx project.
  18 +
  19 + In general, you don't need to use this package directly.
  20 +
  21 + Please use https://www.nuget.org/packages/org.k2fsa.sherpa.onnx instead
  22 + </Description>
  23 + <IncludeBuildOutput>false</IncludeBuildOutput>
  24 +
  25 + <!-- Pack Option -->
  26 + <Title>sherpa-onnx {{ dotnet_rid }} v{{ version }}</Title>
  27 + <PackageId>org.k2fsa.sherpa.onnx.runtime.{{ dotnet_rid }}</PackageId>
  28 +
  29 + <!-- Signing -->
  30 + <SignAssembly>false</SignAssembly>
  31 + <PublicSign>false</PublicSign>
  32 + <DelaySign>false</DelaySign>
  33 + </PropertyGroup>
  34 +
  35 + <ItemGroup>
  36 + <None Include="../README.md" Pack="true" PackagePath="/"/>
  37 + </ItemGroup>
  38 +
  39 + <ItemGroup>
  40 + <!-- Native library must be in native directory... -->
  41 + <!-- If project is built as a STATIC_LIBRARY (e.g. Windows) then we don't have to include it -->
  42 + <Content Include="
  43 + {{ libs }}
  44 + ">
  45 + <PackagePath>runtimes/{{ dotnet_rid }}/native/%(Filename)%(Extension)</PackagePath>
  46 + <Pack>true</Pack>
  47 + <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
  48 + </Content>
  49 + </ItemGroup>
  50 +</Project>
@@ -2,6 +2,11 @@ include_directories(${CMAKE_SOURCE_DIR}) @@ -2,6 +2,11 @@ include_directories(${CMAKE_SOURCE_DIR})
2 add_library(sherpa-onnx-c-api c-api.cc) 2 add_library(sherpa-onnx-c-api c-api.cc)
3 target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core) 3 target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core)
4 4
  5 +if(BUILD_SHARED_LIBS)
  6 + target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1)
  7 + target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1)
  8 +endif()
  9 +
5 install(TARGETS sherpa-onnx-c-api DESTINATION lib) 10 install(TARGETS sherpa-onnx-c-api DESTINATION lib)
6 11
7 install(FILES c-api.h 12 install(FILES c-api.h
@@ -10,10 +10,11 @@ @@ -10,10 +10,11 @@
10 #include <vector> 10 #include <vector>
11 11
12 #include "sherpa-onnx/csrc/display.h" 12 #include "sherpa-onnx/csrc/display.h"
  13 +#include "sherpa-onnx/csrc/offline-recognizer.h"
13 #include "sherpa-onnx/csrc/online-recognizer.h" 14 #include "sherpa-onnx/csrc/online-recognizer.h"
14 15
15 struct SherpaOnnxOnlineRecognizer { 16 struct SherpaOnnxOnlineRecognizer {
16 - sherpa_onnx::OnlineRecognizer *impl; 17 + std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl;
17 }; 18 };
18 19
19 struct SherpaOnnxOnlineStream { 20 struct SherpaOnnxOnlineStream {
@@ -56,14 +57,19 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( @@ -56,14 +57,19 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
56 recognizer_config.endpoint_config.rule3.min_utterance_length = 57 recognizer_config.endpoint_config.rule3.min_utterance_length =
57 config->rule3_min_utterance_length; 58 config->rule3_min_utterance_length;
58 59
  60 + if (config->model_config.debug) {
  61 + fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
  62 + }
  63 +
59 SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; 64 SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
60 - recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config); 65 +
  66 + recognizer->impl =
  67 + std::make_unique<sherpa_onnx::OnlineRecognizer>(recognizer_config);
61 68
62 return recognizer; 69 return recognizer;
63 } 70 }
64 71
65 void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) { 72 void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) {
66 - delete recognizer->impl;  
67 delete recognizer; 73 delete recognizer;
68 } 74 }
69 75
@@ -144,3 +150,116 @@ void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; } @@ -144,3 +150,116 @@ void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; }
144 void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) { 150 void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) {
145 display->impl->Print(idx, s); 151 display->impl->Print(idx, s);
146 } 152 }
  153 +
  154 +// ============================================================
  155 +// For offline ASR (i.e., non-streaming ASR)
  156 +// ============================================================
  157 +//
  158 +struct SherpaOnnxOfflineRecognizer {
  159 + std::unique_ptr<sherpa_onnx::OfflineRecognizer> impl;
  160 +};
  161 +
  162 +struct SherpaOnnxOfflineStream {
  163 + std::unique_ptr<sherpa_onnx::OfflineStream> impl;
  164 + explicit SherpaOnnxOfflineStream(
  165 + std::unique_ptr<sherpa_onnx::OfflineStream> p)
  166 + : impl(std::move(p)) {}
  167 +};
  168 +
  169 +SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
  170 + const SherpaOnnxOfflineRecognizerConfig *config) {
  171 + sherpa_onnx::OfflineRecognizerConfig recognizer_config;
  172 +
  173 + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
  174 +
  175 + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
  176 +
  177 + recognizer_config.model_config.transducer.encoder_filename =
  178 + config->model_config.transducer.encoder;
  179 +
  180 + recognizer_config.model_config.transducer.decoder_filename =
  181 + config->model_config.transducer.decoder;
  182 +
  183 + recognizer_config.model_config.transducer.joiner_filename =
  184 + config->model_config.transducer.joiner;
  185 +
  186 + recognizer_config.model_config.paraformer.model =
  187 + config->model_config.paraformer.model;
  188 +
  189 + recognizer_config.model_config.nemo_ctc.model =
  190 + config->model_config.nemo_ctc.model;
  191 +
  192 + recognizer_config.model_config.tokens = config->model_config.tokens;
  193 + recognizer_config.model_config.num_threads = config->model_config.num_threads;
  194 + recognizer_config.model_config.debug = config->model_config.debug;
  195 +
  196 + recognizer_config.lm_config.model = config->lm_config.model;
  197 + recognizer_config.lm_config.scale = config->lm_config.scale;
  198 +
  199 + recognizer_config.decoding_method = config->decoding_method;
  200 + recognizer_config.max_active_paths = config->max_active_paths;
  201 +
  202 + if (config->model_config.debug) {
  203 + fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
  204 + }
  205 +
  206 + SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
  207 +
  208 + recognizer->impl =
  209 + std::make_unique<sherpa_onnx::OfflineRecognizer>(recognizer_config);
  210 +
  211 + return recognizer;
  212 +}
  213 +
  214 +void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer *recognizer) {
  215 + delete recognizer;
  216 +}
  217 +
  218 +SherpaOnnxOfflineStream *CreateOfflineStream(
  219 + const SherpaOnnxOfflineRecognizer *recognizer) {
  220 + SherpaOnnxOfflineStream *stream =
  221 + new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());
  222 + return stream;
  223 +}
  224 +
  225 +void DestoryOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; }
  226 +
  227 +void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate,
  228 + const float *samples, int32_t n) {
  229 + stream->impl->AcceptWaveform(sample_rate, samples, n);
  230 +}
  231 +
  232 +void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer,
  233 + SherpaOnnxOfflineStream *stream) {
  234 + recognizer->impl->DecodeStream(stream->impl.get());
  235 +}
  236 +
  237 +void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer *recognizer,
  238 + SherpaOnnxOfflineStream **streams,
  239 + int32_t n) {
  240 + std::vector<sherpa_onnx::OfflineStream *> ss(n);
  241 + for (int32_t i = 0; i != n; ++i) {
  242 + ss[i] = streams[i]->impl.get();
  243 + }
  244 + recognizer->impl->DecodeStreams(ss.data(), n);
  245 +}
  246 +
  247 +SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
  248 + SherpaOnnxOfflineStream *stream) {
  249 + const sherpa_onnx::OfflineRecognitionResult &result =
  250 + stream->impl->GetResult();
  251 + const auto &text = result.text;
  252 +
  253 + auto r = new SherpaOnnxOfflineRecognizerResult;
  254 + r->text = new char[text.size() + 1];
  255 + std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
  256 + const_cast<char *>(r->text)[text.size()] = 0;
  257 +
  258 + return r;
  259 +}
  260 +
  261 +void DestroyOfflineRecognizerResult(
  262 + const SherpaOnnxOfflineRecognizerResult *r) {
  263 + delete[] r->text;
  264 + delete r;
  265 +}
@@ -18,12 +18,35 @@ @@ -18,12 +18,35 @@
18 extern "C" { 18 extern "C" {
19 #endif 19 #endif
20 20
  21 +// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h
  22 +// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in
  23 +// CMakeLists.txt
  24 +
  25 +#if defined(_WIN32)
  26 +#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
  27 +#define SHERPA_ONNX_EXPORT __declspec(dllexport)
  28 +#define SHERPA_ONNX_IMPORT __declspec(dllimport)
  29 +#else
  30 +#define SHERPA_ONNX_EXPORT
  31 +#define SHERPA_ONNX_IMPORT
  32 +#endif
  33 +#else // WIN32
  34 +#define SHERPA_ONNX_EXPORT __attribute__((__visibility__("default")))
  35 +#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
  36 +#endif
  37 +
  38 +#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
  39 +#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
  40 +#else
  41 +#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
  42 +#endif
  43 +
21 /// Please refer to 44 /// Please refer to
22 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 45 /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
23 /// to download pre-trained models. That is, you can find encoder-xxx.onnx 46 /// to download pre-trained models. That is, you can find encoder-xxx.onnx
24 /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct 47 /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
25 /// from there. 48 /// from there.
26 -typedef struct SherpaOnnxOnlineTransducerModelConfig { 49 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
27 const char *encoder; 50 const char *encoder;
28 const char *decoder; 51 const char *decoder;
29 const char *joiner; 52 const char *joiner;
@@ -33,7 +56,7 @@ typedef struct SherpaOnnxOnlineTransducerModelConfig { @@ -33,7 +56,7 @@ typedef struct SherpaOnnxOnlineTransducerModelConfig {
33 } SherpaOnnxOnlineTransducerModelConfig; 56 } SherpaOnnxOnlineTransducerModelConfig;
34 57
35 /// It expects 16 kHz 16-bit single channel wave format. 58 /// It expects 16 kHz 16-bit single channel wave format.
36 -typedef struct SherpaOnnxFeatureConfig { 59 +SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
37 /// Sample rate of the input data. MUST match the one expected 60 /// Sample rate of the input data. MUST match the one expected
38 /// by the model. For instance, it should be 16000 for models provided 61 /// by the model. For instance, it should be 16000 for models provided
39 /// by us. 62 /// by us.
@@ -44,7 +67,7 @@ typedef struct SherpaOnnxFeatureConfig { @@ -44,7 +67,7 @@ typedef struct SherpaOnnxFeatureConfig {
44 int32_t feature_dim; 67 int32_t feature_dim;
45 } SherpaOnnxFeatureConfig; 68 } SherpaOnnxFeatureConfig;
46 69
47 -typedef struct SherpaOnnxOnlineRecognizerConfig { 70 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
48 SherpaOnnxFeatureConfig feat_config; 71 SherpaOnnxFeatureConfig feat_config;
49 SherpaOnnxOnlineTransducerModelConfig model_config; 72 SherpaOnnxOnlineTransducerModelConfig model_config;
50 73
@@ -75,7 +98,7 @@ typedef struct SherpaOnnxOnlineRecognizerConfig { @@ -75,7 +98,7 @@ typedef struct SherpaOnnxOnlineRecognizerConfig {
75 float rule3_min_utterance_length; 98 float rule3_min_utterance_length;
76 } SherpaOnnxOnlineRecognizerConfig; 99 } SherpaOnnxOnlineRecognizerConfig;
77 100
78 -typedef struct SherpaOnnxOnlineRecognizerResult { 101 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
79 const char *text; 102 const char *text;
80 // TODO(fangjun): Add more fields 103 // TODO(fangjun): Add more fields
81 } SherpaOnnxOnlineRecognizerResult; 104 } SherpaOnnxOnlineRecognizerResult;
@@ -83,32 +106,34 @@ typedef struct SherpaOnnxOnlineRecognizerResult { @@ -83,32 +106,34 @@ typedef struct SherpaOnnxOnlineRecognizerResult {
83 /// Note: OnlineRecognizer here means StreamingRecognizer. 106 /// Note: OnlineRecognizer here means StreamingRecognizer.
84 /// It does not need to access the Internet during recognition. 107 /// It does not need to access the Internet during recognition.
85 /// Everything is run locally. 108 /// Everything is run locally.
86 -typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;  
87 -typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; 109 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizer
  110 + SherpaOnnxOnlineRecognizer;
  111 +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
88 112
89 -/// @param config Config for the recongizer. 113 +/// @param config Config for the recognizer.
90 /// @return Return a pointer to the recognizer. The user has to invoke 114 /// @return Return a pointer to the recognizer. The user has to invoke
91 // DestroyOnlineRecognizer() to free it to avoid memory leak. 115 // DestroyOnlineRecognizer() to free it to avoid memory leak.
92 -SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( 116 +SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
93 const SherpaOnnxOnlineRecognizerConfig *config); 117 const SherpaOnnxOnlineRecognizerConfig *config);
94 118
95 /// Free a pointer returned by CreateOnlineRecognizer() 119 /// Free a pointer returned by CreateOnlineRecognizer()
96 /// 120 ///
97 /// @param p A pointer returned by CreateOnlineRecognizer() 121 /// @param p A pointer returned by CreateOnlineRecognizer()
98 -void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer); 122 +SHERPA_ONNX_API void DestroyOnlineRecognizer(
  123 + SherpaOnnxOnlineRecognizer *recognizer);
99 124
100 /// Create an online stream for accepting wave samples. 125 /// Create an online stream for accepting wave samples.
101 /// 126 ///
102 /// @param recognizer A pointer returned by CreateOnlineRecognizer() 127 /// @param recognizer A pointer returned by CreateOnlineRecognizer()
103 /// @return Return a pointer to an OnlineStream. The user has to invoke 128 /// @return Return a pointer to an OnlineStream. The user has to invoke
104 /// DestoryOnlineStream() to free it to avoid memory leak. 129 /// DestoryOnlineStream() to free it to avoid memory leak.
105 -SherpaOnnxOnlineStream *CreateOnlineStream( 130 +SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStream(
106 const SherpaOnnxOnlineRecognizer *recognizer); 131 const SherpaOnnxOnlineRecognizer *recognizer);
107 132
108 -/// Destory an online stream. 133 +/// Destroy an online stream.
109 /// 134 ///
110 /// @param stream A pointer returned by CreateOnlineStream() 135 /// @param stream A pointer returned by CreateOnlineStream()
111 -void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); 136 +SHERPA_ONNX_API void DestoryOnlineStream(SherpaOnnxOnlineStream *stream);
112 137
113 /// Accept input audio samples and compute the features. 138 /// Accept input audio samples and compute the features.
114 /// The user has to invoke DecodeOnlineStream() to run the neural network and 139 /// The user has to invoke DecodeOnlineStream() to run the neural network and
@@ -121,16 +146,17 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); @@ -121,16 +146,17 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream);
121 /// @param samples A pointer to a 1-D array containing audio samples. 146 /// @param samples A pointer to a 1-D array containing audio samples.
122 /// The range of samples has to be normalized to [-1, 1]. 147 /// The range of samples has to be normalized to [-1, 1].
123 /// @param n Number of elements in the samples array. 148 /// @param n Number of elements in the samples array.
124 -void AcceptWaveform(SherpaOnnxOnlineStream *stream, int32_t sample_rate,  
125 - const float *samples, int32_t n); 149 +SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream,
  150 + int32_t sample_rate, const float *samples,
  151 + int32_t n);
126 152
127 /// Return 1 if there are enough number of feature frames for decoding. 153 /// Return 1 if there are enough number of feature frames for decoding.
128 /// Return 0 otherwise. 154 /// Return 0 otherwise.
129 /// 155 ///
130 /// @param recognizer A pointer returned by CreateOnlineRecognizer 156 /// @param recognizer A pointer returned by CreateOnlineRecognizer
131 /// @param stream A pointer returned by CreateOnlineStream 157 /// @param stream A pointer returned by CreateOnlineStream
132 -int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer,  
133 - SherpaOnnxOnlineStream *stream); 158 +SHERPA_ONNX_API int32_t IsOnlineStreamReady(
  159 + SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
134 160
135 /// Call this function to run the neural network model and decoding. 161 /// Call this function to run the neural network model and decoding.
136 // 162 //
@@ -142,8 +168,8 @@ int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer, @@ -142,8 +168,8 @@ int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer,
142 /// DecodeOnlineStream(recognizer, stream); 168 /// DecodeOnlineStream(recognizer, stream);
143 /// } 169 /// }
144 /// 170 ///
145 -void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,  
146 - SherpaOnnxOnlineStream *stream); 171 +SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
  172 + SherpaOnnxOnlineStream *stream);
147 173
148 /// This function is similar to DecodeOnlineStream(). It decodes multiple 174 /// This function is similar to DecodeOnlineStream(). It decodes multiple
149 /// OnlineStream in parallel. 175 /// OnlineStream in parallel.
@@ -155,8 +181,9 @@ void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, @@ -155,8 +181,9 @@ void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
155 /// @param streams A pointer array containing pointers returned by 181 /// @param streams A pointer array containing pointers returned by
156 /// CreateOnlineRecognizer() 182 /// CreateOnlineRecognizer()
157 /// @param n Number of elements in the given streams array. 183 /// @param n Number of elements in the given streams array.
158 -void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,  
159 - SherpaOnnxOnlineStream **streams, int32_t n); 184 +SHERPA_ONNX_API void DecodeMultipleOnlineStreams(
  185 + SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream **streams,
  186 + int32_t n);
160 187
161 /// Get the decoding results so far for an OnlineStream. 188 /// Get the decoding results so far for an OnlineStream.
162 /// 189 ///
@@ -165,47 +192,188 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer, @@ -165,47 +192,188 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,
165 /// @return A pointer containing the result. The user has to invoke 192 /// @return A pointer containing the result. The user has to invoke
166 /// DestroyOnlineRecognizerResult() to free the returned pointer to 193 /// DestroyOnlineRecognizerResult() to free the returned pointer to
167 /// avoid memory leak. 194 /// avoid memory leak.
168 -SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( 195 +SHERPA_ONNX_API SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
169 SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream); 196 SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
170 197
171 /// Destroy the pointer returned by GetOnlineStreamResult(). 198 /// Destroy the pointer returned by GetOnlineStreamResult().
172 /// 199 ///
173 /// @param r A pointer returned by GetOnlineStreamResult() 200 /// @param r A pointer returned by GetOnlineStreamResult()
174 -void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r); 201 +SHERPA_ONNX_API void DestroyOnlineRecognizerResult(
  202 + const SherpaOnnxOnlineRecognizerResult *r);
175 203
176 /// Reset an OnlineStream , which clears the neural network model state 204 /// Reset an OnlineStream , which clears the neural network model state
177 /// and the state for decoding. 205 /// and the state for decoding.
178 /// 206 ///
179 /// @param recognizer A pointer returned by CreateOnlineRecognizer(). 207 /// @param recognizer A pointer returned by CreateOnlineRecognizer().
180 /// @param stream A pointer returned by CreateOnlineStream 208 /// @param stream A pointer returned by CreateOnlineStream
181 -void Reset(SherpaOnnxOnlineRecognizer *recognizer,  
182 - SherpaOnnxOnlineStream *stream); 209 +SHERPA_ONNX_API void Reset(SherpaOnnxOnlineRecognizer *recognizer,
  210 + SherpaOnnxOnlineStream *stream);
183 211
184 /// Signal that no more audio samples would be available. 212 /// Signal that no more audio samples would be available.
185 /// After this call, you cannot call AcceptWaveform() any more. 213 /// After this call, you cannot call AcceptWaveform() any more.
186 /// 214 ///
187 /// @param stream A pointer returned by CreateOnlineStream() 215 /// @param stream A pointer returned by CreateOnlineStream()
188 -void InputFinished(SherpaOnnxOnlineStream *stream); 216 +SHERPA_ONNX_API void InputFinished(SherpaOnnxOnlineStream *stream);
189 217
190 /// Return 1 if an endpoint has been detected. 218 /// Return 1 if an endpoint has been detected.
191 /// 219 ///
192 /// @param recognizer A pointer returned by CreateOnlineRecognizer() 220 /// @param recognizer A pointer returned by CreateOnlineRecognizer()
193 /// @param stream A pointer returned by CreateOnlineStream() 221 /// @param stream A pointer returned by CreateOnlineStream()
194 /// @return Return 1 if an endpoint is detected. Return 0 otherwise. 222 /// @return Return 1 if an endpoint is detected. Return 0 otherwise.
195 -int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,  
196 - SherpaOnnxOnlineStream *stream); 223 +SHERPA_ONNX_API int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,
  224 + SherpaOnnxOnlineStream *stream);
197 225
198 // for displaying results on Linux/macOS. 226 // for displaying results on Linux/macOS.
199 -typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; 227 +SHERPA_ONNX_API typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
200 228
201 /// Create a display object. Must be freed using DestroyDisplay to avoid 229 /// Create a display object. Must be freed using DestroyDisplay to avoid
202 /// memory leak. 230 /// memory leak.
203 -SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line); 231 +SHERPA_ONNX_API SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line);
204 232
205 -void DestroyDisplay(SherpaOnnxDisplay *display); 233 +SHERPA_ONNX_API void DestroyDisplay(SherpaOnnxDisplay *display);
206 234
207 /// Print the result. 235 /// Print the result.
208 -void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s); 236 +SHERPA_ONNX_API void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx,
  237 + const char *s);
  238 +// ============================================================
  239 +// For offline ASR (i.e., non-streaming ASR)
  240 +// ============================================================
  241 +
  242 +/// Please refer to
  243 +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  244 +/// to download pre-trained models. That is, you can find encoder-xxx.onnx
  245 +/// decoder-xxx.onnx, and joiner-xxx.onnx for this struct
  246 +/// from there.
  247 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTransducerModelConfig {
  248 + const char *encoder;
  249 + const char *decoder;
  250 + const char *joiner;
  251 +} SherpaOnnxOfflineTransducerModelConfig;
  252 +
  253 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineParaformerModelConfig {
  254 + const char *model;
  255 +} SherpaOnnxOfflineParaformerModelConfig;
  256 +
  257 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineNemoEncDecCtcModelConfig {
  258 + const char *model;
  259 +} SherpaOnnxOfflineNemoEncDecCtcModelConfig;
  260 +
  261 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig {
  262 + const char *model;
  263 + float scale;
  264 +} SherpaOnnxOfflineLMConfig;
  265 +
  266 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
  267 + SherpaOnnxOfflineTransducerModelConfig transducer;
  268 + SherpaOnnxOfflineParaformerModelConfig paraformer;
  269 + SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc;
  270 +
  271 + const char *tokens;
  272 + int32_t num_threads;
  273 + int32_t debug;
  274 +} SherpaOnnxOfflineModelConfig;
  275 +
  276 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
  277 + SherpaOnnxFeatureConfig feat_config;
  278 + SherpaOnnxOfflineModelConfig model_config;
  279 + SherpaOnnxOfflineLMConfig lm_config;
  280 +
  281 + const char *decoding_method;
  282 + int32_t max_active_paths;
  283 +} SherpaOnnxOfflineRecognizerConfig;
  284 +
  285 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer
  286 + SherpaOnnxOfflineRecognizer;
  287 +
  288 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
  289 +
  290 +/// @param config Config for the recognizer.
  291 +/// @return Return a pointer to the recognizer. The user has to invoke
  292 +// DestroyOfflineRecognizer() to free it to avoid memory leak.
  293 +SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
  294 + const SherpaOnnxOfflineRecognizerConfig *config);
  295 +
  296 +/// Free a pointer returned by CreateOfflineRecognizer()
  297 +///
  298 +/// @param p A pointer returned by CreateOfflineRecognizer()
  299 +SHERPA_ONNX_API void DestroyOfflineRecognizer(
  300 + SherpaOnnxOfflineRecognizer *recognizer);
  301 +
  302 +/// Create an offline stream for accepting wave samples.
  303 +///
  304 +/// @param recognizer A pointer returned by CreateOfflineRecognizer()
  305 +/// @return Return a pointer to an OfflineStream. The user has to invoke
  306 +/// DestoryOfflineStream() to free it to avoid memory leak.
  307 +SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream(
  308 + const SherpaOnnxOfflineRecognizer *recognizer);
  309 +
  310 +/// Destroy an offline stream.
  311 +///
  312 +/// @param stream A pointer returned by CreateOfflineStream()
  313 +SHERPA_ONNX_API void DestoryOfflineStream(SherpaOnnxOfflineStream *stream);
  314 +
  315 +/// Accept input audio samples and compute the features.
  316 +/// The user has to invoke DecodeOfflineStream() to run the neural network and
  317 +/// decoding.
  318 +///
  319 +/// @param stream A pointer returned by CreateOfflineStream().
  320 +/// @param sample_rate Sample rate of the input samples. If it is different
  321 +/// from config.feat_config.sample_rate, we will do
  322 +/// resampling inside sherpa-onnx.
  323 +/// @param samples A pointer to a 1-D array containing audio samples.
  324 +/// The range of samples has to be normalized to [-1, 1].
  325 +/// @param n Number of elements in the samples array.
  326 +///
  327 +/// @caution: For each offline stream, please invoke this function only once!
  328 +SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
  329 + int32_t sample_rate,
  330 + const float *samples, int32_t n);
  331 +/// Decode an offline stream.
  332 +///
  333 +/// We assume you have invoked AcceptWaveformOffline() for the given stream
  334 +/// before calling this function.
  335 +///
  336 +/// @param recognizer A pointer returned by CreateOfflineRecognizer().
  337 +/// @param stream A pointer returned by CreateOfflineStream()
  338 +SHERPA_ONNX_API void DecodeOfflineStream(
  339 + SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream);
  340 +
  341 +/// Decode a list offline streams in parallel.
  342 +///
  343 +/// We assume you have invoked AcceptWaveformOffline() for each stream
  344 +/// before calling this function.
  345 +///
  346 +/// @param recognizer A pointer returned by CreateOfflineRecognizer().
  347 +/// @param streams A pointer pointer array containing pointers returned
  348 +/// by CreateOfflineStream().
  349 +/// @param n Number of entries in the given streams.
  350 +SHERPA_ONNX_API void DecodeMultipleOfflineStreams(
  351 + SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,
  352 + int32_t n);
  353 +
  354 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {
  355 + const char *text;
  356 + // TODO(fangjun): Add more fields
  357 +} SherpaOnnxOfflineRecognizerResult;
  358 +
  359 +/// Get the result of the offline stream.
  360 +///
  361 +/// We assume you have called DecodeOfflineStream() or
  362 +/// DecodeMultipleOfflineStreams() with the given stream before calling
  363 +/// this function.
  364 +///
  365 +/// @param stream A pointer returned by CreateOfflineStream().
  366 +/// @return Return a pointer to the result. The user has to invoke
  367 +/// DestroyOnlineRecognizerResult() to free the returned pointer to
  368 +/// avoid memory leak.
  369 +SHERPA_ONNX_API SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
  370 + SherpaOnnxOfflineStream *stream);
  371 +
  372 +/// Destroy the pointer returned by GetOfflineStreamResult().
  373 +///
  374 +/// @param r A pointer returned by GetOfflineStreamResult()
  375 +SHERPA_ONNX_API void DestroyOfflineRecognizerResult(
  376 + const SherpaOnnxOfflineRecognizerResult *r);
209 377
210 #ifdef __cplusplus 378 #ifdef __cplusplus
211 } /* extern "C" */ 379 } /* extern "C" */
1 -using System.Runtime.InteropServices;  
2 -using System.Diagnostics;  
3 -  
4 -namespace SherpaOnnx  
5 -{  
6 - /// <summary>  
7 - /// online recognizer package  
8 - /// Copyright (c) 2023 by manyeyes  
9 - /// </summary>  
10 - public class OnlineBase : IDisposable  
11 - {  
12 - public void Dispose()  
13 - {  
14 - Dispose(disposing: true);  
15 - GC.SuppressFinalize(this);  
16 - }  
17 - protected virtual void Dispose(bool disposing)  
18 - {  
19 - if (!disposing)  
20 - {  
21 - if (_onlineRecognizerResult != IntPtr.Zero)  
22 - {  
23 - SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);  
24 - _onlineRecognizerResult = IntPtr.Zero;  
25 - }  
26 - if (_onlineStream.impl != IntPtr.Zero)  
27 - {  
28 - SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);  
29 - _onlineStream.impl = IntPtr.Zero;  
30 - }  
31 - if (_onlineRecognizer.impl != IntPtr.Zero)  
32 - {  
33 - SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);  
34 - _onlineRecognizer.impl = IntPtr.Zero;  
35 - }  
36 - this._disposed = true;  
37 - }  
38 - }  
39 - ~OnlineBase()  
40 - {  
41 - Dispose(this._disposed);  
42 - }  
43 - internal SherpaOnnxOnlineStream _onlineStream;  
44 - internal IntPtr _onlineRecognizerResult;  
45 - internal SherpaOnnxOnlineRecognizer _onlineRecognizer;  
46 - internal bool _disposed = false;  
47 - }  
48 - public class OnlineStream : OnlineBase  
49 - {  
50 - internal OnlineStream(SherpaOnnxOnlineStream onlineStream)  
51 - {  
52 - this._onlineStream = onlineStream;  
53 - }  
54 - protected override void Dispose(bool disposing)  
55 - {  
56 - if (!disposing)  
57 - {  
58 - SherpaOnnxSharp.DestroyOnlineStream(_onlineStream);  
59 - _onlineStream.impl = IntPtr.Zero;  
60 - this._disposed = true;  
61 - base.Dispose();  
62 - }  
63 - }  
64 - }  
65 - public class OnlineRecognizerResult : OnlineBase  
66 - {  
67 - internal OnlineRecognizerResult(IntPtr onlineRecognizerResult)  
68 - {  
69 - this._onlineRecognizerResult = onlineRecognizerResult;  
70 - }  
71 - protected override void Dispose(bool disposing)  
72 - {  
73 - if (!disposing)  
74 - {  
75 - SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult);  
76 - _onlineRecognizerResult = IntPtr.Zero;  
77 - this._disposed = true;  
78 - base.Dispose(disposing);  
79 - }  
80 - }  
81 - }  
82 - public class OnlineRecognizer<T> : OnlineBase  
83 - where T : class, new()  
84 - {  
85 -  
86 - public OnlineRecognizer(T t,  
87 - string tokensFilePath, string decoding_method = "greedy_search",  
88 - int sample_rate = 16000, int feature_dim = 80,  
89 - int num_threads = 2, bool debug = false, int max_active_paths = 4,  
90 - int enable_endpoint=0,int rule1_min_trailing_silence=0,  
91 - int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0)  
92 - {  
93 - SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer();  
94 - SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig();  
95 - if (t is not null && t.GetType() == typeof(OnlineTransducer))  
96 - {  
97 - OnlineTransducer? onlineTransducer = t as OnlineTransducer;  
98 -#pragma warning disable CS8602 // 解引用可能出现空引用。  
99 - Trace.Assert(File.Exists(onlineTransducer.DecoderFilename)  
100 - && File.Exists(onlineTransducer.EncoderFilename)  
101 - && File.Exists(onlineTransducer.JoinerFilename), "Please provide a model");  
102 -#pragma warning restore CS8602 // 解引用可能出现空引用。  
103 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");  
104 - Trace.Assert(num_threads > 0, "num_threads must be greater than 0");  
105 - transducer.encoder_filename = onlineTransducer.EncoderFilename;  
106 - transducer.decoder_filename = onlineTransducer.DecoderFilename;  
107 - transducer.joiner_filename = onlineTransducer.JoinerFilename;  
108 - }  
109 -  
110 - model_config.transducer = transducer;  
111 - model_config.num_threads = num_threads;  
112 - model_config.debug = debug;  
113 - model_config.tokens = tokensFilePath;  
114 -  
115 - SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();  
116 - feat_config.sample_rate = sample_rate;  
117 - feat_config.feature_dim = feature_dim;  
118 -  
119 - SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig;  
120 - sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method;  
121 - sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config;  
122 - sherpaOnnxOnlineRecognizerConfig.model_config = model_config;  
123 - sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths;  
124 - //endpoint  
125 - sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint;  
126 - sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence;  
127 - sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence;  
128 - sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length;  
129 -  
130 - _onlineRecognizer =  
131 - SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig);  
132 - }  
133 - internal OnlineStream CreateOnlineStream()  
134 - {  
135 - SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer);  
136 - return new OnlineStream(stream);  
137 - }  
138 - public void InputFinished(OnlineStream stream)  
139 - {  
140 - SherpaOnnxSharp.InputFinished(stream._onlineStream);  
141 - }  
142 - public List<OnlineStream> CreateStreams(List<float[]> samplesList)  
143 - {  
144 - int batch_size = samplesList.Count;  
145 - List<OnlineStream> streams = new List<OnlineStream>();  
146 - for (int i = 0; i < batch_size; i++)  
147 - {  
148 - OnlineStream stream = CreateOnlineStream();  
149 - AcceptWaveform(stream._onlineStream, 16000, samplesList[i]);  
150 - InputFinished(stream);  
151 - streams.Add(stream);  
152 - }  
153 - return streams;  
154 - }  
155 - public OnlineStream CreateStream()  
156 - {  
157 - OnlineStream stream = CreateOnlineStream();  
158 - return stream;  
159 - }  
160 - internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples)  
161 - {  
162 - SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length);  
163 - }  
164 - public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples)  
165 - {  
166 - AcceptWaveform(stream._onlineStream, sample_rate, samples);  
167 - }  
168 - internal IntPtr GetStreamsIntPtr(OnlineStream[] streams)  
169 - {  
170 - int streams_len = streams.Length;  
171 - int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream));  
172 - IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);  
173 - unsafe  
174 - {  
175 - byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());  
176 - for (int i = 0; i < streams_len; i++, ptrbds += (size))  
177 - {  
178 - IntPtr streamIntptr = new IntPtr(ptrbds);  
179 - Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false);  
180 - }  
181 -  
182 - }  
183 - return streamsIntPtr;  
184 - }  
185 - internal bool IsReady(OnlineStream stream)  
186 - {  
187 - return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0;  
188 - }  
189 - public void DecodeMultipleStreams(List<OnlineStream> streams)  
190 - {  
191 - while (true)  
192 - {  
193 - List<OnlineStream> streamList = new List<OnlineStream>();  
194 - foreach (OnlineStream stream in streams)  
195 - {  
196 - if (IsReady(stream))  
197 - {  
198 - streamList.Add(stream);  
199 - }  
200 - }  
201 - if (streamList.Count == 0)  
202 - {  
203 - break;  
204 - }  
205 - OnlineStream[] streamsBatch = new OnlineStream[streamList.Count];  
206 - for (int i = 0; i < streamsBatch.Length; i++)  
207 - {  
208 - streamsBatch[i] = streamList[i];  
209 - }  
210 - streamList.Clear();  
211 - IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch);  
212 - SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length);  
213 - Marshal.FreeHGlobal(streamsIntPtr);  
214 - }  
215 - }  
216 - public void DecodeStream(OnlineStream stream)  
217 - {  
218 - while (IsReady(stream))  
219 - {  
220 - SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream);  
221 - }  
222 - }  
223 - internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream)  
224 - {  
225 - IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream);  
226 - OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip);  
227 -#pragma warning disable CS8605 // 取消装箱可能为 null 的值。  
228 - SherpaOnnxOnlineRecognizerResult result =  
229 - (SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure(  
230 - onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult));  
231 -#pragma warning restore CS8605 // 取消装箱可能为 null 的值。  
232 -  
233 -#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。  
234 - string text = Marshal.PtrToStringAnsi(result.text);  
235 -#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。  
236 - OnlineRecognizerResultEntity onlineRecognizerResultEntity =  
237 - new OnlineRecognizerResultEntity();  
238 - onlineRecognizerResultEntity.text = text;  
239 - onlineRecognizerResultEntity.text_len = result.text_len;  
240 -  
241 - return onlineRecognizerResultEntity;  
242 - }  
243 - public OnlineRecognizerResultEntity GetResult(OnlineStream stream)  
244 - {  
245 - OnlineRecognizerResultEntity result = GetResult(stream._onlineStream);  
246 - return result;  
247 - }  
248 - public List<OnlineRecognizerResultEntity> GetResults(List<OnlineStream> streams)  
249 - {  
250 - List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>();  
251 - foreach (OnlineStream stream in streams)  
252 - {  
253 - OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream);  
254 - results.Add(onlineRecognizerResultEntity);  
255 - }  
256 - return results;  
257 - }  
258 - protected override void Dispose(bool disposing)  
259 - {  
260 - if (!disposing)  
261 - {  
262 - SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer);  
263 - _onlineRecognizer.impl = IntPtr.Zero;  
264 - this._disposed = true;  
265 - base.Dispose();  
266 - }  
267 - }  
268 - }  
269 - public class OfflineBase : IDisposable  
270 - {  
271 - public void Dispose()  
272 - {  
273 - Dispose(disposing: true);  
274 - GC.SuppressFinalize(this);  
275 - }  
276 - protected virtual void Dispose(bool disposing)  
277 - {  
278 - if (!disposing)  
279 - {  
280 - if (_offlineRecognizerResult != IntPtr.Zero)  
281 - {  
282 - SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);  
283 - _offlineRecognizerResult = IntPtr.Zero;  
284 - }  
285 - if (_offlineStream.impl != IntPtr.Zero)  
286 - {  
287 - SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);  
288 - _offlineStream.impl = IntPtr.Zero;  
289 - }  
290 - if (_offlineRecognizer.impl != IntPtr.Zero)  
291 - {  
292 - SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);  
293 - _offlineRecognizer.impl = IntPtr.Zero;  
294 - }  
295 - this._disposed = true;  
296 - }  
297 - }  
298 - ~OfflineBase()  
299 - {  
300 - Dispose(this._disposed);  
301 - }  
302 - internal SherpaOnnxOfflineStream _offlineStream;  
303 - internal IntPtr _offlineRecognizerResult;  
304 - internal SherpaOnnxOfflineRecognizer _offlineRecognizer;  
305 - internal bool _disposed = false;  
306 - }  
307 - public class OfflineStream : OfflineBase  
308 - {  
309 - internal OfflineStream(SherpaOnnxOfflineStream offlineStream)  
310 - {  
311 - this._offlineStream = offlineStream;  
312 - }  
313 -  
314 - protected override void Dispose(bool disposing)  
315 - {  
316 - if (!disposing)  
317 - {  
318 - SherpaOnnxSharp.DestroyOfflineStream(_offlineStream);  
319 - _offlineStream.impl = IntPtr.Zero;  
320 - this._disposed = true;  
321 - base.Dispose();  
322 - }  
323 - }  
324 - }  
325 - public class OfflineRecognizerResult : OfflineBase  
326 - {  
327 - internal OfflineRecognizerResult(IntPtr offlineRecognizerResult)  
328 - {  
329 - this._offlineRecognizerResult = offlineRecognizerResult;  
330 - }  
331 - protected override void Dispose(bool disposing)  
332 - {  
333 - if (!disposing)  
334 - {  
335 - SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult);  
336 - _offlineRecognizerResult = IntPtr.Zero;  
337 - this._disposed = true;  
338 - base.Dispose(disposing);  
339 - }  
340 - }  
341 - }  
342 - public class OfflineRecognizer<T> : OfflineBase  
343 - where T : class, new()  
344 - {  
345 - public OfflineRecognizer(T t,  
346 - string tokensFilePath, string decoding_method = "greedy_search",  
347 - int sample_rate = 16000, int feature_dim = 80,  
348 - int num_threads = 2, bool debug = false)  
349 - {  
350 - SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer();  
351 - SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer();  
352 - SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc();  
353 - SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig();  
354 - if (t is not null && t.GetType() == typeof(OfflineTransducer))  
355 - {  
356 - OfflineTransducer? offlineTransducer = t as OfflineTransducer;  
357 -#pragma warning disable CS8602 // 解引用可能出现空引用。  
358 - Trace.Assert(File.Exists(offlineTransducer.DecoderFilename)  
359 - && File.Exists(offlineTransducer.EncoderFilename)  
360 - && File.Exists(offlineTransducer.JoinerFilename), "Please provide a model");  
361 -#pragma warning restore CS8602 // 解引用可能出现空引用。  
362 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");  
363 - Trace.Assert(num_threads > 0, "num_threads must be greater than 0");  
364 - transducer.encoder_filename = offlineTransducer.EncoderFilename;  
365 - transducer.decoder_filename = offlineTransducer.DecoderFilename;  
366 - transducer.joiner_filename = offlineTransducer.JoinerFilename;  
367 - }  
368 - else if (t is not null && t.GetType() == typeof(OfflineParaformer))  
369 - {  
370 - OfflineParaformer? offlineParaformer = t as OfflineParaformer;  
371 -#pragma warning disable CS8602 // 解引用可能出现空引用。  
372 - Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model");  
373 -#pragma warning restore CS8602 // 解引用可能出现空引用。  
374 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");  
375 - Trace.Assert(num_threads > 0, "num_threads must be greater than 0");  
376 - paraformer.model = offlineParaformer.Model;  
377 - }  
378 - else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc))  
379 - {  
380 - OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc;  
381 -#pragma warning disable CS8602 // 解引用可能出现空引用。  
382 - Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model");  
383 -#pragma warning restore CS8602 // 解引用可能出现空引用。  
384 - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens");  
385 - Trace.Assert(num_threads > 0, "num_threads must be greater than 0");  
386 - nemo_ctc.model = offlineNemoEncDecCtc.Model;  
387 - }  
388 -  
389 - model_config.transducer = transducer;  
390 - model_config.paraformer = paraformer;  
391 - model_config.nemo_ctc = nemo_ctc;  
392 - model_config.num_threads = num_threads;  
393 - model_config.debug = debug;  
394 - model_config.tokens = tokensFilePath;  
395 -  
396 - SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig();  
397 - feat_config.sample_rate = sample_rate;  
398 - feat_config.feature_dim = feature_dim;  
399 -  
400 - SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig;  
401 - sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method;  
402 - sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config;  
403 - sherpaOnnxOfflineRecognizerConfig.model_config = model_config;  
404 -  
405 - _offlineRecognizer =  
406 - SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig);  
407 - }  
408 - internal OfflineStream CreateOfflineStream()  
409 - {  
410 - SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer);  
411 - return new OfflineStream(stream);  
412 - }  
413 - public OfflineStream[] CreateOfflineStream(List<float[]> samplesList)  
414 - {  
415 - int batch_size = samplesList.Count;  
416 - OfflineStream[] streams = new OfflineStream[batch_size];  
417 - List<string> wavFiles = new List<string>();  
418 - for (int i = 0; i < batch_size; i++)  
419 - {  
420 - OfflineStream stream = CreateOfflineStream();  
421 - AcceptWaveform(stream._offlineStream, 16000, samplesList[i]);  
422 - streams[i] = stream;  
423 - }  
424 - return streams;  
425 - }  
426 - internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples)  
427 - {  
428 - SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length);  
429 - }  
430 - internal IntPtr GetStreamsIntPtr(OfflineStream[] streams)  
431 - {  
432 - int streams_len = streams.Length;  
433 - int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream));  
434 - IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len);  
435 - unsafe  
436 - {  
437 - byte* ptrbds = (byte*)(streamsIntPtr.ToPointer());  
438 - for (int i = 0; i < streams_len; i++, ptrbds += (size))  
439 - {  
440 - IntPtr streamIntptr = new IntPtr(ptrbds);  
441 - Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false);  
442 - }  
443 - }  
444 - return streamsIntPtr;  
445 - }  
446 - public void DecodeMultipleOfflineStreams(OfflineStream[] streams)  
447 - {  
448 - IntPtr streamsIntPtr = GetStreamsIntPtr(streams);  
449 - SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length);  
450 - Marshal.FreeHGlobal(streamsIntPtr);  
451 - }  
452 - internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream)  
453 - {  
454 - IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream);  
455 - OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip);  
456 -#pragma warning disable CS8605 // 取消装箱可能为 null 的值。  
457 - SherpaOnnxOfflineRecognizerResult result =  
458 - (SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure(  
459 - offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult));  
460 -#pragma warning restore CS8605 // 取消装箱可能为 null 的值。  
461 -  
462 -#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。  
463 - string text = Marshal.PtrToStringAnsi(result.text);  
464 -#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。  
465 - OfflineRecognizerResultEntity offlineRecognizerResultEntity =  
466 - new OfflineRecognizerResultEntity();  
467 - offlineRecognizerResultEntity.text = text;  
468 - offlineRecognizerResultEntity.text_len = result.text_len;  
469 -  
470 - return offlineRecognizerResultEntity;  
471 - }  
472 - public List<OfflineRecognizerResultEntity> GetResults(OfflineStream[] streams)  
473 - {  
474 - List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>();  
475 - foreach (OfflineStream stream in streams)  
476 - {  
477 - OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream);  
478 - results.Add(offlineRecognizerResultEntity);  
479 - }  
480 - return results;  
481 - }  
482 - protected override void Dispose(bool disposing)  
483 - {  
484 - if (!disposing)  
485 - {  
486 - SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer);  
487 - _offlineRecognizer.impl = IntPtr.Zero;  
488 - this._disposed = true;  
489 - base.Dispose();  
490 - }  
491 - }  
492 - }  
493 - internal static partial class SherpaOnnxSharp  
494 - {  
495 - private const string dllName = @"SherpaOnnxSharp";  
496 -  
497 - [DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]  
498 - internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config);  
499 -  
500 - [DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]  
501 - internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer);  
502 -  
503 - [DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]  
504 - internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size);  
505 -  
506 - [DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]  
507 - internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream);  
508 -  
509 - [DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]  
510 - internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr  
511 - streams, int n);  
512 -  
513 - [DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)]  
514 - internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream);  
515 -  
516 - [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]  
517 - internal static extern void DestroyOfflineRecognizerResult(IntPtr result);  
518 -  
519 - [DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)]  
520 - internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream);  
521 -  
522 - [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)]  
523 - internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer);  
524 -  
525 - [DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]  
526 - internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config);  
527 -  
528 - /// Free a pointer returned by CreateOnlineRecognizer()  
529 - ///  
530 - /// @param p A pointer returned by CreateOnlineRecognizer()  
531 - [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)]  
532 - internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer);  
533 -  
534 - /// Create an online stream for accepting wave samples.  
535 - ///  
536 - /// @param recognizer A pointer returned by CreateOnlineRecognizer()  
537 - /// @return Return a pointer to an OnlineStream. The user has to invoke  
538 - /// DestroyOnlineStream() to free it to avoid memory leak.  
539 - [DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)]  
540 - internal static extern SherpaOnnxOnlineStream CreateOnlineStream(  
541 - SherpaOnnxOnlineRecognizer recognizer);  
542 -  
543 - /// Destroy an online stream.  
544 - ///  
545 - /// @param stream A pointer returned by CreateOnlineStream()  
546 - [DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)]  
547 - internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream);  
548 -  
549 - /// Accept input audio samples and compute the features.  
550 - /// The user has to invoke DecodeOnlineStream() to run the neural network and  
551 - /// decoding.  
552 - ///  
553 - /// @param stream A pointer returned by CreateOnlineStream().  
554 - /// @param sample_rate Sample rate of the input samples. If it is different  
555 - /// from config.feat_config.sample_rate, we will do  
556 - /// resampling inside sherpa-onnx.  
557 - /// @param samples A pointer to a 1-D array containing audio samples.  
558 - /// The range of samples has to be normalized to [-1, 1].  
559 - /// @param n Number of elements in the samples array.  
560 - [DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)]  
561 - internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate,  
562 - float[] samples, int n);  
563 -  
564 - /// Return 1 if there are enough number of feature frames for decoding.  
565 - /// Return 0 otherwise.  
566 - ///  
567 - /// @param recognizer A pointer returned by CreateOnlineRecognizer  
568 - /// @param stream A pointer returned by CreateOnlineStream  
569 - [DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)]  
570 - internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer,  
571 - SherpaOnnxOnlineStream stream);  
572 -  
573 - /// Call this function to run the neural network model and decoding.  
574 - //  
575 - /// Precondition for this function: IsOnlineStreamReady() MUST return 1.  
576 - ///  
577 - /// Usage example:  
578 - ///  
579 - /// while (IsOnlineStreamReady(recognizer, stream)) {  
580 - /// DecodeOnlineStream(recognizer, stream);  
581 - /// }  
582 - ///  
583 - [DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)]  
584 - internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer,  
585 - SherpaOnnxOnlineStream stream);  
586 -  
587 - /// This function is similar to DecodeOnlineStream(). It decodes multiple  
588 - /// OnlineStream in parallel.  
589 - ///  
590 - /// Caution: The caller has to ensure each OnlineStream is ready, i.e.,  
591 - /// IsOnlineStreamReady() for that stream should return 1.  
592 - ///  
593 - /// @param recognizer A pointer returned by CreateOnlineRecognizer()  
594 - /// @param streams A pointer array containing pointers returned by  
595 - /// CreateOnlineRecognizer()  
596 - /// @param n Number of elements in the given streams array.  
597 - [DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)]  
598 - internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer,  
599 - IntPtr streams, int n);  
600 -  
601 - /// Get the decoding results so far for an OnlineStream.  
602 - ///  
603 - /// @param recognizer A pointer returned by CreateOnlineRecognizer().  
604 - /// @param stream A pointer returned by CreateOnlineStream().  
605 - /// @return A pointer containing the result. The user has to invoke  
606 - /// DestroyOnlineRecognizerResult() to free the returned pointer to  
607 - /// avoid memory leak.  
608 - [DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)]  
609 - internal static extern IntPtr GetOnlineStreamResult(  
610 - SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream);  
611 -  
612 - /// Destroy the pointer returned by GetOnlineStreamResult().  
613 - ///  
614 - /// @param r A pointer returned by GetOnlineStreamResult()  
615 - [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)]  
616 - internal static extern void DestroyOnlineRecognizerResult(IntPtr result);  
617 -  
618 - /// Reset an OnlineStream , which clears the neural network model state  
619 - /// and the state for decoding.  
620 - ///  
621 - /// @param recognizer A pointer returned by CreateOnlineRecognizer().  
622 - /// @param stream A pointer returned by CreateOnlineStream  
623 - [DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)]  
624 - internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer,  
625 - SherpaOnnxOnlineStream stream);  
626 -  
627 - /// Signal that no more audio samples would be available.  
628 - /// After this call, you cannot call AcceptWaveform() any more.  
629 - ///  
630 - /// @param stream A pointer returned by CreateOnlineStream()  
631 - [DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)]  
632 - internal static extern void InputFinished(SherpaOnnxOnlineStream stream);  
633 -  
634 - /// Return 1 if an endpoint has been detected.  
635 - ///  
636 - /// @param recognizer A pointer returned by CreateOnlineRecognizer()  
637 - /// @param stream A pointer returned by CreateOnlineStream()  
638 - /// @return Return 1 if an endpoint is detected. Return 0 otherwise.  
639 - [DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)]  
640 - internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer,  
641 - SherpaOnnxOnlineStream stream);  
642 - }  
643 - internal struct SherpaOnnxOfflineTransducer  
644 - {  
645 - public string encoder_filename;  
646 - public string decoder_filename;  
647 - public string joiner_filename;  
648 - public SherpaOnnxOfflineTransducer()  
649 - {  
650 - encoder_filename = "";  
651 - decoder_filename = "";  
652 - joiner_filename = "";  
653 - }  
654 - };  
655 - internal struct SherpaOnnxOfflineParaformer  
656 - {  
657 - public string model;  
658 - public SherpaOnnxOfflineParaformer()  
659 - {  
660 - model = "";  
661 - }  
662 - };  
663 - internal struct SherpaOnnxOfflineNemoEncDecCtc  
664 - {  
665 - public string model;  
666 - public SherpaOnnxOfflineNemoEncDecCtc()  
667 - {  
668 - model = "";  
669 - }  
670 - };  
671 - internal struct SherpaOnnxOfflineModelConfig  
672 - {  
673 - public SherpaOnnxOfflineTransducer transducer;  
674 - public SherpaOnnxOfflineParaformer paraformer;  
675 - public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;  
676 - public string tokens;  
677 - public int num_threads;  
678 - public bool debug;  
679 - };  
680 - /// It expects 16 kHz 16-bit single channel wave format.  
681 - internal struct SherpaOnnxFeatureConfig  
682 - {  
683 - /// Sample rate of the input data. MUST match the one expected  
684 - /// by the model. For instance, it should be 16000 for models provided  
685 - /// by us.  
686 - public int sample_rate;  
687 -  
688 - /// Feature dimension of the model.  
689 - /// For instance, it should be 80 for models provided by us.  
690 - public int feature_dim;  
691 - };  
692 - internal struct SherpaOnnxOfflineRecognizerConfig  
693 - {  
694 - public SherpaOnnxFeatureConfig feat_config;  
695 - public SherpaOnnxOfflineModelConfig model_config;  
696 -  
697 - /// Possible values are: greedy_search, modified_beam_search  
698 - public string decoding_method;  
699 -  
700 - };  
701 - internal struct SherpaOnnxOfflineRecognizer  
702 - {  
703 - public IntPtr impl;  
704 - };  
705 - [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]  
706 - internal struct SherpaOnnxOfflineStream  
707 - {  
708 - public IntPtr impl;  
709 - };  
710 - internal struct SherpaOnnxOfflineRecognizerResult  
711 - {  
712 - public IntPtr text;  
713 - public int text_len;  
714 - }  
715 - internal struct SherpaOnnxOnlineTransducer  
716 - {  
717 - public string encoder_filename;  
718 - public string decoder_filename;  
719 - public string joiner_filename;  
720 - public SherpaOnnxOnlineTransducer()  
721 - {  
722 - encoder_filename = string.Empty;  
723 - decoder_filename = string.Empty;  
724 - joiner_filename = string.Empty;  
725 - }  
726 - };  
727 - internal struct SherpaOnnxOnlineModelConfig  
728 - {  
729 - public SherpaOnnxOnlineTransducer transducer;  
730 - public string tokens;  
731 - public int num_threads;  
732 - public bool debug; // true to print debug information of the model  
733 - };  
734 - internal struct SherpaOnnxOnlineRecognizerConfig  
735 - {  
736 - public SherpaOnnxFeatureConfig feat_config;  
737 - public SherpaOnnxOnlineModelConfig model_config;  
738 -  
739 - /// Possible values are: greedy_search, modified_beam_search  
740 - public string decoding_method;  
741 -  
742 - /// Used only when decoding_method is modified_beam_search  
743 - /// Example value: 4  
744 - public int max_active_paths;  
745 -  
746 - /// 0 to disable endpoint detection.  
747 - /// A non-zero value to enable endpoint detection.  
748 - public int enable_endpoint;  
749 -  
750 - /// An endpoint is detected if trailing silence in seconds is larger than  
751 - /// this value even if nothing has been decoded.  
752 - /// Used only when enable_endpoint is not 0.  
753 - public float rule1_min_trailing_silence;  
754 -  
755 - /// An endpoint is detected if trailing silence in seconds is larger than  
756 - /// this value after something that is not blank has been decoded.  
757 - /// Used only when enable_endpoint is not 0.  
758 - public float rule2_min_trailing_silence;  
759 -  
760 - /// An endpoint is detected if the utterance in seconds is larger than  
761 - /// this value.  
762 - /// Used only when enable_endpoint is not 0.  
763 - public float rule3_min_utterance_length;  
764 - };  
765 - internal struct SherpaOnnxOnlineRecognizerResult  
766 - {  
767 - public IntPtr text;  
768 - public int text_len;  
769 - // TODO: Add more fields  
770 - }  
771 - internal struct SherpaOnnxOnlineRecognizer  
772 - {  
773 - public IntPtr impl;  
774 - };  
775 - [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)]  
776 - internal struct SherpaOnnxOnlineStream  
777 - {  
778 - public IntPtr impl;  
779 - };  
780 - public class OfflineNemoEncDecCtc  
781 - {  
782 - private string model = string.Empty;  
783 - public string Model { get => model; set => model = value; }  
784 - }  
785 - public class OfflineParaformer  
786 - {  
787 - private string model = string.Empty;  
788 - public string Model { get => model; set => model = value; }  
789 - }  
790 - public class OfflineRecognizerResultEntity  
791 - {  
792 - /// <summary>  
793 - /// recognizer result  
794 - /// </summary>  
795 - public string? text { get; set; }  
796 - /// <summary>  
797 - /// recognizer result length  
798 - /// </summary>  
799 - public int text_len { get; set; }  
800 - /// <summary>  
801 - /// decode tokens  
802 - /// </summary>  
803 - public List<string>? tokens { get; set; }  
804 - /// <summary>  
805 - /// timestamps  
806 - /// </summary>  
807 - public List<float>? timestamps { get; set; }  
808 - }  
809 - public class OfflineTransducer  
810 - {  
811 - private string encoderFilename = string.Empty;  
812 - private string decoderFilename = string.Empty;  
813 - private string joinerFilename = string.Empty;  
814 - public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }  
815 - public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }  
816 - public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }  
817 - }  
818 - public class OnlineEndpoint  
819 - {  
820 - /// 0 to disable endpoint detection.  
821 - /// A non-zero value to enable endpoint detection.  
822 - private int enableEndpoint;  
823 -  
824 - /// An endpoint is detected if trailing silence in seconds is larger than  
825 - /// this value even if nothing has been decoded.  
826 - /// Used only when enable_endpoint is not 0.  
827 - private float rule1MinTrailingSilence;  
828 -  
829 - /// An endpoint is detected if trailing silence in seconds is larger than  
830 - /// this value after something that is not blank has been decoded.  
831 - /// Used only when enable_endpoint is not 0.  
832 - private float rule2MinTrailingSilence;  
833 -  
834 - /// An endpoint is detected if the utterance in seconds is larger than  
835 - /// this value.  
836 - /// Used only when enable_endpoint is not 0.  
837 - private float rule3MinUtteranceLength;  
838 -  
839 - public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; }  
840 - public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; }  
841 - public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; }  
842 - public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; }  
843 - }  
844 - public class OnlineRecognizerResultEntity  
845 - {  
846 - /// <summary>  
847 - /// recognizer result  
848 - /// </summary>  
849 - public string? text { get; set; }  
850 - /// <summary>  
851 - /// recognizer result length  
852 - /// </summary>  
853 - public int text_len { get; set; }  
854 - /// <summary>  
855 - /// decode tokens  
856 - /// </summary>  
857 - public List<string>? tokens { get; set; }  
858 - /// <summary>  
859 - /// timestamps  
860 - /// </summary>  
861 - public List<float>? timestamps { get; set; }  
862 - }  
863 - public class OnlineTransducer  
864 - {  
865 - private string encoderFilename = string.Empty;  
866 - private string decoderFilename = string.Empty;  
867 - private string joinerFilename = string.Empty;  
868 - public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; }  
869 - public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; }  
870 - public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; }  
871 - }  
872 -}  
1 -// sherpa-onnx/sharp-api/offline-api.cpp  
2 -//  
3 -// Copyright (c) 2023 Manyeyes Corporation  
4 -  
5 -#include "offline-api.h"  
6 -  
7 -#include "sherpa-onnx/csrc/display.h"  
8 -#include "sherpa-onnx/csrc/offline-recognizer.h"  
9 -  
10 -namespace sherpa_onnx  
11 -{  
12 - struct SherpaOnnxOfflineRecognizer {  
13 - sherpa_onnx::OfflineRecognizer* impl;  
14 - };  
15 -  
16 - struct SherpaOnnxOfflineStream {  
17 - std::unique_ptr<sherpa_onnx::OfflineStream> impl;  
18 - explicit SherpaOnnxOfflineStream(std::unique_ptr<sherpa_onnx::OfflineStream> p)  
19 - : impl(std::move(p)) {}  
20 - };  
21 -  
22 - struct SherpaOnnxDisplay {  
23 - std::unique_ptr<sherpa_onnx::Display> impl;  
24 - };  
25 -  
26 - SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer(  
27 - const SherpaOnnxOfflineRecognizerConfig* config) {  
28 - sherpa_onnx::OfflineRecognizerConfig recognizer_config;  
29 -  
30 - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;  
31 - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;  
32 -  
33 - if (strlen(config->model_config.transducer.encoder_filename) > 0) {  
34 - recognizer_config.model_config.transducer.encoder_filename =  
35 - config->model_config.transducer.encoder_filename;  
36 - recognizer_config.model_config.transducer.decoder_filename =  
37 - config->model_config.transducer.decoder_filename;  
38 - recognizer_config.model_config.transducer.joiner_filename =  
39 - config->model_config.transducer.joiner_filename;  
40 - }  
41 - else if (strlen(config->model_config.paraformer.model) > 0) {  
42 - recognizer_config.model_config.paraformer.model =  
43 - config->model_config.paraformer.model;  
44 - }  
45 - else if (strlen(config->model_config.nemo_ctc.model) > 0) {  
46 - recognizer_config.model_config.nemo_ctc.model =  
47 - config->model_config.nemo_ctc.model;  
48 - }  
49 -  
50 - recognizer_config.model_config.tokens =  
51 - config->model_config.tokens;  
52 - recognizer_config.model_config.num_threads =  
53 - config->model_config.num_threads;  
54 - recognizer_config.model_config.debug =  
55 - config->model_config.debug;  
56 -  
57 - recognizer_config.decoding_method = config->decoding_method;  
58 -  
59 - SherpaOnnxOfflineRecognizer* recognizer =  
60 - new SherpaOnnxOfflineRecognizer;  
61 - recognizer->impl =  
62 - new sherpa_onnx::OfflineRecognizer(recognizer_config);  
63 -  
64 - return recognizer;  
65 - }  
66 -  
67 - SherpaOnnxOfflineStream* __stdcall CreateOfflineStream(  
68 - SherpaOnnxOfflineRecognizer* recognizer) {  
69 - SherpaOnnxOfflineStream* stream =  
70 - new SherpaOnnxOfflineStream(recognizer->impl->CreateStream());  
71 - return stream;  
72 - }  
73 -  
74 - void __stdcall AcceptWaveform(  
75 - SherpaOnnxOfflineStream* stream,  
76 - int32_t sample_rate,  
77 - const float* samples, int32_t samples_size) {  
78 - std::vector<float> waveform{ samples, samples + samples_size };  
79 - stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size());  
80 - }  
81 -  
82 - void __stdcall DecodeOfflineStream(  
83 - SherpaOnnxOfflineRecognizer* recognizer,  
84 - SherpaOnnxOfflineStream* stream) {  
85 - recognizer->impl->DecodeStream(stream->impl.get());  
86 - }  
87 -  
88 - void __stdcall DecodeMultipleOfflineStreams(  
89 - SherpaOnnxOfflineRecognizer* recognizer,  
90 - SherpaOnnxOfflineStream** streams, int32_t n) {  
91 - std::vector<sherpa_onnx::OfflineStream*> ss(n);  
92 - for (int32_t i = 0; i != n; ++i) {  
93 - ss[i] = streams[i]->impl.get();  
94 - }  
95 - recognizer->impl->DecodeStreams(ss.data(), n);  
96 - }  
97 -  
98 - SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult(  
99 - SherpaOnnxOfflineStream* stream) {  
100 - sherpa_onnx::OfflineRecognitionResult result =  
101 - stream->impl->GetResult();  
102 - const auto& text = result.text;  
103 - auto r = new SherpaOnnxOfflineRecognizerResult;  
104 - r->text = new char[text.size() + 1];  
105 - std::copy(text.begin(), text.end(), const_cast<char*>(r->text));  
106 - const_cast<char*>(r->text)[text.size()] = 0;  
107 - r->text_len = text.size();  
108 - return r;  
109 - }  
110 -  
111 -  
112 - /// Free a pointer returned by CreateOfflineRecognizer()  
113 - ///  
114 - /// @param p A pointer returned by CreateOfflineRecognizer()  
115 - void __stdcall DestroyOfflineRecognizer(  
116 - SherpaOnnxOfflineRecognizer* recognizer) {  
117 - delete recognizer->impl;  
118 - delete recognizer;  
119 - }  
120 -  
121 - /// Destory an offline stream.  
122 - ///  
123 - /// @param stream A pointer returned by CreateOfflineStream()  
124 - void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) {  
125 - delete stream;  
126 - }  
127 -  
128 - /// Destroy the pointer returned by GetOfflineStreamResult().  
129 - ///  
130 - /// @param r A pointer returned by GetOfflineStreamResult()  
131 - void __stdcall DestroyOfflineRecognizerResult(  
132 - SherpaOnnxOfflineRecognizerResult* r) {  
133 - delete r->text;  
134 - delete r;  
135 - }  
136 -}// namespace sherpa_onnx  
1 -// sherpa-onnx/sharp-api/offline-api.h  
2 -//  
3 -// Copyright (c) 2023 Manyeyes Corporation  
4 -  
5 -#pragma once  
6 -  
7 -#include <list>  
8 -  
9 -namespace sherpa_onnx  
10 -{  
11 - /// Please refer to  
12 - /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
13 - /// to download pre-trained models. That is, you can find encoder-xxx.onnx  
14 - /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct  
15 - /// from there.  
16 - typedef struct SherpaOnnxOfflineTransducer {  
17 - const char* encoder_filename;  
18 - const char* decoder_filename;  
19 - const char* joiner_filename;  
20 - } SherpaOnnxOfflineTransducer;  
21 -  
22 - typedef struct SherpaOnnxOfflineParaformer {  
23 - const char* model;  
24 - }SherpaOnnxOfflineParaformer;  
25 -  
26 - typedef struct SherpaOnnxOfflineNemoEncDecCtc {  
27 - const char* model;  
28 - }SherpaOnnxOfflineNemoEncDecCtc;  
29 -  
30 -  
31 - typedef struct SherpaOnnxOfflineModelConfig {  
32 - SherpaOnnxOfflineTransducer transducer;  
33 - SherpaOnnxOfflineParaformer paraformer;  
34 - SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;  
35 - const char* tokens;  
36 - const int32_t num_threads;  
37 - const bool debug;  
38 - } SherpaOnnxOfflineModelConfig;  
39 -  
40 - /// It expects 16 kHz 16-bit single channel wave format.  
41 - typedef struct SherpaOnnxFeatureConfig {  
42 - /// Sample rate of the input data. MUST match the one expected  
43 - /// by the model. For instance, it should be 16000 for models provided  
44 - /// by us.  
45 - int32_t sample_rate;  
46 -  
47 - /// Feature dimension of the model.  
48 - /// For instance, it should be 80 for models provided by us.  
49 - int32_t feature_dim;  
50 - } SherpaOnnxFeatureConfig;  
51 -  
52 - typedef struct SherpaOnnxOfflineRecognizerConfig {  
53 - SherpaOnnxFeatureConfig feat_config;  
54 - SherpaOnnxOfflineModelConfig model_config;  
55 -  
56 - /// Possible values are: greedy_search, modified_beam_search  
57 - const char* decoding_method;  
58 -  
59 - } SherpaOnnxOfflineRecognizerConfig;  
60 -  
61 - typedef struct SherpaOnnxOfflineRecognizerResult {  
62 - // Recognition results.  
63 - // For English, it consists of space separated words.  
64 - // For Chinese, it consists of Chinese words without spaces.  
65 - char* text;  
66 - int text_len;  
67 -  
68 - // Decoded results at the token level.  
69 - // For instance, for BPE-based models it consists of a list of BPE tokens.  
70 - // std::vector<std::string> tokens;  
71 -  
72 - // timestamps.size() == tokens.size()  
73 - // timestamps[i] records the time in seconds when tokens[i] is decoded.  
74 - // std::vector<float> timestamps;  
75 - } SherpaOnnxOfflineRecognizerResult;  
76 -  
77 - /// Note: OfflineRecognizer here means StreamingRecognizer.  
78 - /// It does not need to access the Internet during recognition.  
79 - /// Everything is run locally.  
80 - typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer;  
81 -  
82 - typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;  
83 -  
84 - extern "C" __declspec(dllexport)  
85 - SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer(  
86 - const SherpaOnnxOfflineRecognizerConfig * config);  
87 -  
88 - extern "C" __declspec(dllexport)  
89 - SherpaOnnxOfflineStream * __stdcall CreateOfflineStream(  
90 - SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer);  
91 -  
92 - extern "C" __declspec(dllexport)  
93 - void __stdcall AcceptWaveform(  
94 - SherpaOnnxOfflineStream * stream, int32_t sample_rate,  
95 - const float* samples, int32_t samples_size);  
96 -  
97 - extern "C" __declspec(dllexport)  
98 - void __stdcall DecodeOfflineStream(  
99 - SherpaOnnxOfflineRecognizer * recognizer,  
100 - SherpaOnnxOfflineStream * stream);  
101 -  
102 - extern "C" __declspec(dllexport)  
103 - void __stdcall DecodeMultipleOfflineStreams(  
104 - SherpaOnnxOfflineRecognizer * recognizer,  
105 - SherpaOnnxOfflineStream * *streams, int32_t n);  
106 -  
107 - extern "C" __declspec(dllexport)  
108 - SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult(  
109 - SherpaOnnxOfflineStream * stream);  
110 -  
111 - extern "C" __declspec(dllexport)  
112 - void __stdcall DestroyOfflineRecognizer(  
113 - SherpaOnnxOfflineRecognizer * recognizer);  
114 -  
115 - extern "C" __declspec(dllexport)  
116 - void __stdcall DestroyOfflineStream(  
117 - SherpaOnnxOfflineStream * stream);  
118 -  
119 - extern "C" __declspec(dllexport)  
120 - void __stdcall DestroyOfflineRecognizerResult(  
121 - SherpaOnnxOfflineRecognizerResult * r);  
122 -}// namespace sherpa_onnx  
1 -// sherpa-onnx/cpp-api/c-api.cc  
2 -//  
3 -// Copyright (c) 2023 Xiaomi Corporation  
4 -  
5 -#include "online-api.h"  
6 -  
7 -#include <algorithm>  
8 -#include <memory>  
9 -#include <utility>  
10 -#include <vector>  
11 -  
12 -#include "../../sherpa-onnx/csrc/display.h"  
13 -#include "../../sherpa-onnx/csrc/online-recognizer.h"  
14 -namespace sherpa_onnx  
15 -{  
16 - struct SherpaOnnxOnlineRecognizer {  
17 - sherpa_onnx::OnlineRecognizer* impl;  
18 - };  
19 -  
20 - struct SherpaOnnxOnlineStream {  
21 - std::unique_ptr<sherpa_onnx::OnlineStream> impl;  
22 - explicit SherpaOnnxOnlineStream(std::unique_ptr<sherpa_onnx::OnlineStream> p)  
23 - : impl(std::move(p)) {}  
24 - };  
25 -  
26 - struct SherpaOnnxDisplay {  
27 - std::unique_ptr<sherpa_onnx::Display> impl;  
28 - };  
29 -  
30 - SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(  
31 - const SherpaOnnxOnlineRecognizerConfig* config) {  
32 - sherpa_onnx::OnlineRecognizerConfig recognizer_config;  
33 -  
34 - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;  
35 - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;  
36 -  
37 - recognizer_config.model_config.encoder_filename =  
38 - config->model_config.transducer.encoder;  
39 - recognizer_config.model_config.decoder_filename =  
40 - config->model_config.transducer.decoder;  
41 - recognizer_config.model_config.joiner_filename = config->model_config.transducer.joiner;  
42 - recognizer_config.model_config.tokens = config->model_config.tokens;  
43 - recognizer_config.model_config.num_threads = config->model_config.num_threads;  
44 - recognizer_config.model_config.debug = config->model_config.debug;  
45 -  
46 - recognizer_config.decoding_method = config->decoding_method;  
47 - recognizer_config.max_active_paths = config->max_active_paths;  
48 -  
49 - recognizer_config.enable_endpoint = config->enable_endpoint;  
50 -  
51 - recognizer_config.endpoint_config.rule1.min_trailing_silence =  
52 - config->rule1_min_trailing_silence;  
53 -  
54 - recognizer_config.endpoint_config.rule2.min_trailing_silence =  
55 - config->rule2_min_trailing_silence;  
56 -  
57 - recognizer_config.endpoint_config.rule3.min_utterance_length =  
58 - config->rule3_min_utterance_length;  
59 -  
60 - SherpaOnnxOnlineRecognizer* recognizer = new SherpaOnnxOnlineRecognizer;  
61 - recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config);  
62 -  
63 - return recognizer;  
64 - }  
65 -  
66 - void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer) {  
67 - delete recognizer->impl;  
68 - delete recognizer;  
69 - }  
70 -  
71 - SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(  
72 - const SherpaOnnxOnlineRecognizer* recognizer) {  
73 - SherpaOnnxOnlineStream* stream =  
74 - new SherpaOnnxOnlineStream(recognizer->impl->CreateStream());  
75 - return stream;  
76 - }  
77 -  
78 - void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream) { delete stream; }  
79 -  
80 - void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,  
81 - const float* samples, int32_t n) {  
82 - stream->impl->AcceptWaveform(sample_rate, samples, n);  
83 - }  
84 -  
85 - int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,  
86 - SherpaOnnxOnlineStream* stream) {  
87 - return recognizer->impl->IsReady(stream->impl.get());  
88 - }  
89 -  
90 - void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,  
91 - SherpaOnnxOnlineStream* stream) {  
92 - recognizer->impl->DecodeStream(stream->impl.get());  
93 - }  
94 -  
95 - void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,  
96 - SherpaOnnxOnlineStream** streams, int32_t n) {  
97 - std::vector<sherpa_onnx::OnlineStream*> ss(n);  
98 - for (int32_t i = 0; i != n; ++i) {  
99 - ss[i] = streams[i]->impl.get();  
100 - }  
101 - recognizer->impl->DecodeStreams(ss.data(), n);  
102 - }  
103 -  
104 - SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(  
105 - SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream) {  
106 - sherpa_onnx::OnlineRecognizerResult result =  
107 - recognizer->impl->GetResult(stream->impl.get());  
108 - const auto& text = result.text;  
109 -  
110 - auto r = new SherpaOnnxOnlineRecognizerResult;  
111 - r->text = new char[text.size() + 1];  
112 - std::copy(text.begin(), text.end(), const_cast<char*>(r->text));  
113 - const_cast<char*>(r->text)[text.size()] = 0;  
114 - r->text_len = text.size();  
115 - return r;  
116 - }  
117 -  
118 - void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r) {  
119 - delete[] r->text;  
120 - delete r;  
121 - }  
122 -  
123 - void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,  
124 - SherpaOnnxOnlineStream* stream) {  
125 - recognizer->impl->Reset(stream->impl.get());  
126 - }  
127 -  
128 - void __stdcall InputFinished(SherpaOnnxOnlineStream* stream) {  
129 - stream->impl->InputFinished();  
130 - }  
131 -  
132 - int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,  
133 - SherpaOnnxOnlineStream* stream) {  
134 - return recognizer->impl->IsEndpoint(stream->impl.get());  
135 - }  
136 -  
137 - SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line) {  
138 - SherpaOnnxDisplay* ans = new SherpaOnnxDisplay;  
139 - ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line);  
140 - return ans;  
141 - }  
142 -  
143 - void __stdcall DestroyDisplay(SherpaOnnxDisplay* display) { delete display; }  
144 -  
145 - void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s) {  
146 - display->impl->Print(idx, s);  
147 - }  
148 -}  
1 -// sherpa-onnx/cpp-api/c-api.h  
2 -//  
3 -// Copyright (c) 2023 Xiaomi Corporation  
4 -  
5 -// C API for sherpa-onnx  
6 -//  
7 -// Please refer to  
8 -// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c  
9 -// for usages.  
10 -//  
11 -  
12 -#ifndef SHERPA_ONNX_CPP_API_C_API_H_  
13 -#define SHERPA_ONNX_CPP_API_C_API_H_  
14 -  
15 -#include <stdint.h>  
16 -  
17 -#ifdef __cplusplus  
18 -extern "C" {  
19 -#endif  
20 - namespace sherpa_onnx  
21 - {  
22 - /// Please refer to  
23 - /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
24 - /// to download pre-trained models. That is, you can find encoder-xxx.onnx  
25 - /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct  
26 - /// from there.  
27 - typedef struct SherpaOnnxOnlineTransducer {  
28 - const char* encoder;  
29 - const char* decoder;  
30 - const char* joiner;  
31 - } SherpaOnnxOnlineTransducer;  
32 -  
33 - typedef struct SherpaOnnxOnlineModelConfig  
34 - {  
35 - const SherpaOnnxOnlineTransducer transducer;  
36 - const char* tokens;  
37 - const int32_t num_threads;  
38 - const bool debug; // true to print debug information of the model  
39 - }SherpaOnnxOnlineModelConfig;  
40 -  
41 - /// It expects 16 kHz 16-bit single channel wave format.  
42 - typedef struct SherpaOnnxFeatureConfig {  
43 - /// Sample rate of the input data. MUST match the one expected  
44 - /// by the model. For instance, it should be 16000 for models provided  
45 - /// by us.  
46 - int32_t sample_rate;  
47 -  
48 - /// Feature dimension of the model.  
49 - /// For instance, it should be 80 for models provided by us.  
50 - int32_t feature_dim;  
51 - } SherpaOnnxFeatureConfig;  
52 -  
53 - typedef struct SherpaOnnxOnlineRecognizerConfig {  
54 - SherpaOnnxFeatureConfig feat_config;  
55 - SherpaOnnxOnlineModelConfig model_config;  
56 -  
57 - /// Possible values are: greedy_search, modified_beam_search  
58 - const char* decoding_method;  
59 -  
60 - /// Used only when decoding_method is modified_beam_search  
61 - /// Example value: 4  
62 - int32_t max_active_paths;  
63 -  
64 - /// 0 to disable endpoint detection.  
65 - /// A non-zero value to enable endpoint detection.  
66 - int enable_endpoint;  
67 -  
68 - /// An endpoint is detected if trailing silence in seconds is larger than  
69 - /// this value even if nothing has been decoded.  
70 - /// Used only when enable_endpoint is not 0.  
71 - float rule1_min_trailing_silence;  
72 -  
73 - /// An endpoint is detected if trailing silence in seconds is larger than  
74 - /// this value after something that is not blank has been decoded.  
75 - /// Used only when enable_endpoint is not 0.  
76 - float rule2_min_trailing_silence;  
77 -  
78 - /// An endpoint is detected if the utterance in seconds is larger than  
79 - /// this value.  
80 - /// Used only when enable_endpoint is not 0.  
81 - float rule3_min_utterance_length;  
82 - } SherpaOnnxOnlineRecognizerConfig;  
83 -  
84 - typedef struct SherpaOnnxOnlineRecognizerResult {  
85 - const char* text;  
86 - int text_len;  
87 - // TODO(fangjun): Add more fields  
88 - } SherpaOnnxOnlineRecognizerResult;  
89 -  
90 - /// Note: OnlineRecognizer here means StreamingRecognizer.  
91 - /// It does not need to access the Internet during recognition.  
92 - /// Everything is run locally.  
93 - typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;  
94 - typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;  
95 -  
96 - /// @param config Config for the recongizer.  
97 - /// @return Return a pointer to the recognizer. The user has to invoke  
98 - // DestroyOnlineRecognizer() to free it to avoid memory leak.  
99 - extern "C" __declspec(dllexport)  
100 - SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(  
101 - const SherpaOnnxOnlineRecognizerConfig * config);  
102 -  
103 - /// Free a pointer returned by CreateOnlineRecognizer()  
104 - ///  
105 - /// @param p A pointer returned by CreateOnlineRecognizer()  
106 - extern "C" __declspec(dllexport)  
107 - void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer);  
108 -  
109 - /// Create an online stream for accepting wave samples.  
110 - ///  
111 - /// @param recognizer A pointer returned by CreateOnlineRecognizer()  
112 - /// @return Return a pointer to an OnlineStream. The user has to invoke  
113 - /// DestroyOnlineStream() to free it to avoid memory leak.  
114 - extern "C" __declspec(dllexport)  
115 - SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(  
116 - const SherpaOnnxOnlineRecognizer* recognizer);  
117 -  
118 - /// Destroy an online stream.  
119 - ///  
120 - /// @param stream A pointer returned by CreateOnlineStream()  
121 - extern "C" __declspec(dllexport)  
122 - void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream);  
123 -  
124 - /// Accept input audio samples and compute the features.  
125 - /// The user has to invoke DecodeOnlineStream() to run the neural network and  
126 - /// decoding.  
127 - ///  
128 - /// @param stream A pointer returned by CreateOnlineStream().  
129 - /// @param sample_rate Sample rate of the input samples. If it is different  
130 - /// from config.feat_config.sample_rate, we will do  
131 - /// resampling inside sherpa-onnx.  
132 - /// @param samples A pointer to a 1-D array containing audio samples.  
133 - /// The range of samples has to be normalized to [-1, 1].  
134 - /// @param n Number of elements in the samples array.  
135 - extern "C" __declspec(dllexport)  
136 - void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,  
137 - const float* samples, int32_t n);  
138 -  
139 - /// Return 1 if there are enough number of feature frames for decoding.  
140 - /// Return 0 otherwise.  
141 - ///  
142 - /// @param recognizer A pointer returned by CreateOnlineRecognizer  
143 - /// @param stream A pointer returned by CreateOnlineStream  
144 - extern "C" __declspec(dllexport)  
145 - int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,  
146 - SherpaOnnxOnlineStream* stream);  
147 -  
148 - /// Call this function to run the neural network model and decoding.  
149 - //  
150 - /// Precondition for this function: IsOnlineStreamReady() MUST return 1.  
151 - ///  
152 - /// Usage example:  
153 - ///  
154 - /// while (IsOnlineStreamReady(recognizer, stream)) {  
155 - /// DecodeOnlineStream(recognizer, stream);  
156 - /// }  
157 - ///  
158 - extern "C" __declspec(dllexport)  
159 - void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,  
160 - SherpaOnnxOnlineStream* stream);  
161 -  
162 - /// This function is similar to DecodeOnlineStream(). It decodes multiple  
163 - /// OnlineStream in parallel.  
164 - ///  
165 - /// Caution: The caller has to ensure each OnlineStream is ready, i.e.,  
166 - /// IsOnlineStreamReady() for that stream should return 1.  
167 - ///  
168 - /// @param recognizer A pointer returned by CreateOnlineRecognizer()  
169 - /// @param streams A pointer array containing pointers returned by  
170 - /// CreateOnlineRecognizer()  
171 - /// @param n Number of elements in the given streams array.  
172 - extern "C" __declspec(dllexport)  
173 - void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,  
174 - SherpaOnnxOnlineStream** streams, int32_t n);  
175 -  
176 - /// Get the decoding results so far for an OnlineStream.  
177 - ///  
178 - /// @param recognizer A pointer returned by CreateOnlineRecognizer().  
179 - /// @param stream A pointer returned by CreateOnlineStream().  
180 - /// @return A pointer containing the result. The user has to invoke  
181 - /// DestroyOnlineRecognizerResult() to free the returned pointer to  
182 - /// avoid memory leak.  
183 - extern "C" __declspec(dllexport)  
184 - SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(  
185 - SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream);  
186 -  
187 - /// Destroy the pointer returned by GetOnlineStreamResult().  
188 - ///  
189 - /// @param r A pointer returned by GetOnlineStreamResult()  
190 - extern "C" __declspec(dllexport)  
191 - void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r);  
192 -  
193 - /// Reset an OnlineStream , which clears the neural network model state  
194 - /// and the state for decoding.  
195 - ///  
196 - /// @param recognizer A pointer returned by CreateOnlineRecognizer().  
197 - /// @param stream A pointer returned by CreateOnlineStream  
198 - extern "C" __declspec(dllexport)  
199 - void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,  
200 - SherpaOnnxOnlineStream* stream);  
201 -  
202 - /// Signal that no more audio samples would be available.  
203 - /// After this call, you cannot call AcceptWaveform() any more.  
204 - ///  
205 - /// @param stream A pointer returned by CreateOnlineStream()  
206 - extern "C" __declspec(dllexport)  
207 - void __stdcall InputFinished(SherpaOnnxOnlineStream* stream);  
208 -  
209 - /// Return 1 if an endpoint has been detected.  
210 - ///  
211 - /// @param recognizer A pointer returned by CreateOnlineRecognizer()  
212 - /// @param stream A pointer returned by CreateOnlineStream()  
213 - /// @return Return 1 if an endpoint is detected. Return 0 otherwise.  
214 - extern "C" __declspec(dllexport)  
215 - int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,  
216 - SherpaOnnxOnlineStream* stream);  
217 -  
218 - // for displaying results on Linux/macOS.  
219 - typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;  
220 -  
221 - /// Create a display object. Must be freed using DestroyDisplay to avoid  
222 - /// memory leak.  
223 - extern "C" __declspec(dllexport)  
224 - SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line);  
225 -  
226 - extern "C" __declspec(dllexport)  
227 - void __stdcall DestroyDisplay(SherpaOnnxDisplay* display);  
228 -  
229 - /// Print the result.  
230 - extern "C" __declspec(dllexport)  
231 - void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s);  
232 - }  
233 -  
234 -#ifdef __cplusplus  
235 -} /* extern "C" */  
236 -#endif  
237 -  
238 -#endif // SHERPA_ONNX_C_API_C_API_H_