c-api.h
16.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
// sherpa-onnx/c-api/c-api.h
//
// Copyright (c) 2023 Xiaomi Corporation
// C API for sherpa-onnx
//
// Please refer to
// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
// for usages.
//
#ifndef SHERPA_ONNX_C_API_C_API_H_
#define SHERPA_ONNX_C_API_C_API_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h
// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in
// CMakeLists.txt
#if defined(_WIN32)
#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS)
#define SHERPA_ONNX_EXPORT __declspec(dllexport)
#define SHERPA_ONNX_IMPORT __declspec(dllimport)
#else
#define SHERPA_ONNX_EXPORT
#define SHERPA_ONNX_IMPORT
#endif
#else // WIN32
#define SHERPA_ONNX_EXPORT
#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT
#endif
#if defined(SHERPA_ONNX_BUILD_MAIN_LIB)
#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT
#else
#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT
#endif
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
/// from there.
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
const char *encoder;
const char *decoder;
const char *joiner;
} SherpaOnnxOnlineTransducerModelConfig;
// please visit
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
// to download pre-trained streaming paraformer models
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineParaformerModelConfig {
const char *encoder;
const char *decoder;
} SherpaOnnxOnlineParaformerModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxModelConfig {
SherpaOnnxOnlineTransducerModelConfig transducer;
SherpaOnnxOnlineParaformerModelConfig paraformer;
const char *tokens;
int32_t num_threads;
const char *provider;
int32_t debug; // true to print debug information of the model
const char *model_type;
} SherpaOnnxOnlineModelConfig;
/// It expects 16 kHz 16-bit single channel wave format.
SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
/// Sample rate of the input data. MUST match the one expected
/// by the model. For instance, it should be 16000 for models provided
/// by us.
int32_t sample_rate;
/// Feature dimension of the model.
/// For instance, it should be 80 for models provided by us.
int32_t feature_dim;
} SherpaOnnxFeatureConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
SherpaOnnxFeatureConfig feat_config;
SherpaOnnxOnlineModelConfig model_config;
/// Possible values are: greedy_search, modified_beam_search
const char *decoding_method;
/// Used only when decoding_method is modified_beam_search
/// Example value: 4
int32_t max_active_paths;
/// 0 to disable endpoint detection.
/// A non-zero value to enable endpoint detection.
int32_t enable_endpoint;
/// An endpoint is detected if trailing silence in seconds is larger than
/// this value even if nothing has been decoded.
/// Used only when enable_endpoint is not 0.
float rule1_min_trailing_silence;
/// An endpoint is detected if trailing silence in seconds is larger than
/// this value after something that is not blank has been decoded.
/// Used only when enable_endpoint is not 0.
float rule2_min_trailing_silence;
/// An endpoint is detected if the utterance in seconds is larger than
/// this value.
/// Used only when enable_endpoint is not 0.
float rule3_min_utterance_length;
} SherpaOnnxOnlineRecognizerConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
// Recognized text
const char *text;
// Pointer to continuous memory which holds string based tokens
// which are seperated by \0
const char *tokens;
// a pointer array contains the address of the first item in tokens
const char *const *tokens_arr;
// Pointer to continuous memory which holds timestamps
float *timestamps;
// The number of tokens/timestamps in above pointer
int32_t count;
/** Return a json string.
*
* The returned string contains:
* {
* "text": "The recognition result",
* "tokens": [x, x, x],
* "timestamps": [x, x, x],
* "segment": x,
* "start_time": x,
* "is_final": true|false
* }
*/
const char *json;
} SherpaOnnxOnlineRecognizerResult;
/// Note: OnlineRecognizer here means StreamingRecognizer.
/// It does not need to access the Internet during recognition.
/// Everything is run locally.
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizer
SherpaOnnxOnlineRecognizer;
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
/// @param config Config for the recognizer.
/// @return Return a pointer to the recognizer. The user has to invoke
// DestroyOnlineRecognizer() to free it to avoid memory leak.
SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
const SherpaOnnxOnlineRecognizerConfig *config);
/// Free a pointer returned by CreateOnlineRecognizer()
///
/// @param p A pointer returned by CreateOnlineRecognizer()
SHERPA_ONNX_API void DestroyOnlineRecognizer(
SherpaOnnxOnlineRecognizer *recognizer);
/// Create an online stream for accepting wave samples.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @return Return a pointer to an OnlineStream. The user has to invoke
/// DestroyOnlineStream() to free it to avoid memory leak.
SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStream(
const SherpaOnnxOnlineRecognizer *recognizer);
/// Destroy an online stream.
///
/// @param stream A pointer returned by CreateOnlineStream()
SHERPA_ONNX_API void DestroyOnlineStream(SherpaOnnxOnlineStream *stream);
/// Accept input audio samples and compute the features.
/// The user has to invoke DecodeOnlineStream() to run the neural network and
/// decoding.
///
/// @param stream A pointer returned by CreateOnlineStream().
/// @param sample_rate Sample rate of the input samples. If it is different
/// from config.feat_config.sample_rate, we will do
/// resampling inside sherpa-onnx.
/// @param samples A pointer to a 1-D array containing audio samples.
/// The range of samples has to be normalized to [-1, 1].
/// @param n Number of elements in the samples array.
SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream,
int32_t sample_rate, const float *samples,
int32_t n);
/// Return 1 if there are enough number of feature frames for decoding.
/// Return 0 otherwise.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer
/// @param stream A pointer returned by CreateOnlineStream
SHERPA_ONNX_API int32_t IsOnlineStreamReady(
SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
/// Call this function to run the neural network model and decoding.
//
/// Precondition for this function: IsOnlineStreamReady() MUST return 1.
///
/// Usage example:
///
/// while (IsOnlineStreamReady(recognizer, stream)) {
/// DecodeOnlineStream(recognizer, stream);
/// }
///
SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
SherpaOnnxOnlineStream *stream);
/// This function is similar to DecodeOnlineStream(). It decodes multiple
/// OnlineStream in parallel.
///
/// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
/// IsOnlineStreamReady() for that stream should return 1.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @param streams A pointer array containing pointers returned by
/// CreateOnlineRecognizer()
/// @param n Number of elements in the given streams array.
SHERPA_ONNX_API void DecodeMultipleOnlineStreams(
SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream **streams,
int32_t n);
/// Get the decoding results so far for an OnlineStream.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
/// @param stream A pointer returned by CreateOnlineStream().
/// @return A pointer containing the result. The user has to invoke
/// DestroyOnlineRecognizerResult() to free the returned pointer to
/// avoid memory leak.
SHERPA_ONNX_API SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
/// Destroy the pointer returned by GetOnlineStreamResult().
///
/// @param r A pointer returned by GetOnlineStreamResult()
SHERPA_ONNX_API void DestroyOnlineRecognizerResult(
const SherpaOnnxOnlineRecognizerResult *r);
/// Reset an OnlineStream , which clears the neural network model state
/// and the state for decoding.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
/// @param stream A pointer returned by CreateOnlineStream
SHERPA_ONNX_API void Reset(SherpaOnnxOnlineRecognizer *recognizer,
SherpaOnnxOnlineStream *stream);
/// Signal that no more audio samples would be available.
/// After this call, you cannot call AcceptWaveform() any more.
///
/// @param stream A pointer returned by CreateOnlineStream()
SHERPA_ONNX_API void InputFinished(SherpaOnnxOnlineStream *stream);
/// Return 1 if an endpoint has been detected.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @param stream A pointer returned by CreateOnlineStream()
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
SHERPA_ONNX_API int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,
SherpaOnnxOnlineStream *stream);
// for displaying results on Linux/macOS.
SHERPA_ONNX_API typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
/// Create a display object. Must be freed using DestroyDisplay to avoid
/// memory leak.
SHERPA_ONNX_API SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line);
SHERPA_ONNX_API void DestroyDisplay(SherpaOnnxDisplay *display);
/// Print the result.
SHERPA_ONNX_API void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx,
const char *s);
// ============================================================
// For offline ASR (i.e., non-streaming ASR)
// ============================================================
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
/// decoder-xxx.onnx, and joiner-xxx.onnx for this struct
/// from there.
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTransducerModelConfig {
const char *encoder;
const char *decoder;
const char *joiner;
} SherpaOnnxOfflineTransducerModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineParaformerModelConfig {
const char *model;
} SherpaOnnxOfflineParaformerModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineNemoEncDecCtcModelConfig {
const char *model;
} SherpaOnnxOfflineNemoEncDecCtcModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
const char *encoder;
const char *decoder;
} SherpaOnnxOfflineWhisperModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig {
const char *model;
} SherpaOnnxOfflineTdnnModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig {
const char *model;
float scale;
} SherpaOnnxOfflineLMConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
SherpaOnnxOfflineTransducerModelConfig transducer;
SherpaOnnxOfflineParaformerModelConfig paraformer;
SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc;
SherpaOnnxOfflineWhisperModelConfig whisper;
SherpaOnnxOfflineTdnnModelConfig tdnn;
const char *tokens;
int32_t num_threads;
int32_t debug;
const char *provider;
const char *model_type;
} SherpaOnnxOfflineModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
SherpaOnnxFeatureConfig feat_config;
SherpaOnnxOfflineModelConfig model_config;
SherpaOnnxOfflineLMConfig lm_config;
const char *decoding_method;
int32_t max_active_paths;
} SherpaOnnxOfflineRecognizerConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer
SherpaOnnxOfflineRecognizer;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
/// @param config Config for the recognizer.
/// @return Return a pointer to the recognizer. The user has to invoke
// DestroyOfflineRecognizer() to free it to avoid memory leak.
SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
const SherpaOnnxOfflineRecognizerConfig *config);
/// Free a pointer returned by CreateOfflineRecognizer()
///
/// @param p A pointer returned by CreateOfflineRecognizer()
SHERPA_ONNX_API void DestroyOfflineRecognizer(
SherpaOnnxOfflineRecognizer *recognizer);
/// Create an offline stream for accepting wave samples.
///
/// @param recognizer A pointer returned by CreateOfflineRecognizer()
/// @return Return a pointer to an OfflineStream. The user has to invoke
/// DestroyOfflineStream() to free it to avoid memory leak.
SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream(
const SherpaOnnxOfflineRecognizer *recognizer);
/// Destroy an offline stream.
///
/// @param stream A pointer returned by CreateOfflineStream()
SHERPA_ONNX_API void DestroyOfflineStream(SherpaOnnxOfflineStream *stream);
/// Accept input audio samples and compute the features.
/// The user has to invoke DecodeOfflineStream() to run the neural network and
/// decoding.
///
/// @param stream A pointer returned by CreateOfflineStream().
/// @param sample_rate Sample rate of the input samples. If it is different
/// from config.feat_config.sample_rate, we will do
/// resampling inside sherpa-onnx.
/// @param samples A pointer to a 1-D array containing audio samples.
/// The range of samples has to be normalized to [-1, 1].
/// @param n Number of elements in the samples array.
///
/// @caution: For each offline stream, please invoke this function only once!
SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream,
int32_t sample_rate,
const float *samples, int32_t n);
/// Decode an offline stream.
///
/// We assume you have invoked AcceptWaveformOffline() for the given stream
/// before calling this function.
///
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
/// @param stream A pointer returned by CreateOfflineStream()
SHERPA_ONNX_API void DecodeOfflineStream(
SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream);
/// Decode a list offline streams in parallel.
///
/// We assume you have invoked AcceptWaveformOffline() for each stream
/// before calling this function.
///
/// @param recognizer A pointer returned by CreateOfflineRecognizer().
/// @param streams A pointer pointer array containing pointers returned
/// by CreateOfflineStream().
/// @param n Number of entries in the given streams.
SHERPA_ONNX_API void DecodeMultipleOfflineStreams(
SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams,
int32_t n);
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {
const char *text;
// Pointer to continuous memory which holds timestamps
//
// It is NULL if the model does not support timestamps
float *timestamps;
// number of entries in timestamps
int32_t count;
// TODO(fangjun): Add more fields
} SherpaOnnxOfflineRecognizerResult;
/// Get the result of the offline stream.
///
/// We assume you have called DecodeOfflineStream() or
/// DecodeMultipleOfflineStreams() with the given stream before calling
/// this function.
///
/// @param stream A pointer returned by CreateOfflineStream().
/// @return Return a pointer to the result. The user has to invoke
/// DestroyOnlineRecognizerResult() to free the returned pointer to
/// avoid memory leak.
SHERPA_ONNX_API SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
SherpaOnnxOfflineStream *stream);
/// Destroy the pointer returned by GetOfflineStreamResult().
///
/// @param r A pointer returned by GetOfflineStreamResult()
SHERPA_ONNX_API void DestroyOfflineRecognizerResult(
const SherpaOnnxOfflineRecognizerResult *r);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif // SHERPA_ONNX_C_API_C_API_H_