online-api.h
9.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
// sherpa-onnx/cpp-api/c-api.h
//
// Copyright (c) 2023 Xiaomi Corporation
// C API for sherpa-onnx
//
// Please refer to
// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
// for usages.
//
#ifndef SHERPA_ONNX_CPP_API_C_API_H_
#define SHERPA_ONNX_CPP_API_C_API_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
namespace sherpa_onnx
{
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
/// from there.
typedef struct SherpaOnnxOnlineTransducer {
const char* encoder;
const char* decoder;
const char* joiner;
} SherpaOnnxOnlineTransducer;
typedef struct SherpaOnnxOnlineModelConfig
{
const SherpaOnnxOnlineTransducer transducer;
const char* tokens;
const int32_t num_threads;
const bool debug; // true to print debug information of the model
}SherpaOnnxOnlineModelConfig;
/// It expects 16 kHz 16-bit single channel wave format.
typedef struct SherpaOnnxFeatureConfig {
/// Sample rate of the input data. MUST match the one expected
/// by the model. For instance, it should be 16000 for models provided
/// by us.
int32_t sample_rate;
/// Feature dimension of the model.
/// For instance, it should be 80 for models provided by us.
int32_t feature_dim;
} SherpaOnnxFeatureConfig;
typedef struct SherpaOnnxOnlineRecognizerConfig {
SherpaOnnxFeatureConfig feat_config;
SherpaOnnxOnlineModelConfig model_config;
/// Possible values are: greedy_search, modified_beam_search
const char* decoding_method;
/// Used only when decoding_method is modified_beam_search
/// Example value: 4
int32_t max_active_paths;
/// 0 to disable endpoint detection.
/// A non-zero value to enable endpoint detection.
int enable_endpoint;
/// An endpoint is detected if trailing silence in seconds is larger than
/// this value even if nothing has been decoded.
/// Used only when enable_endpoint is not 0.
float rule1_min_trailing_silence;
/// An endpoint is detected if trailing silence in seconds is larger than
/// this value after something that is not blank has been decoded.
/// Used only when enable_endpoint is not 0.
float rule2_min_trailing_silence;
/// An endpoint is detected if the utterance in seconds is larger than
/// this value.
/// Used only when enable_endpoint is not 0.
float rule3_min_utterance_length;
} SherpaOnnxOnlineRecognizerConfig;
typedef struct SherpaOnnxOnlineRecognizerResult {
const char* text;
int text_len;
// TODO(fangjun): Add more fields
} SherpaOnnxOnlineRecognizerResult;
/// Note: OnlineRecognizer here means StreamingRecognizer.
/// It does not need to access the Internet during recognition.
/// Everything is run locally.
typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;
typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
/// @param config Config for the recongizer.
/// @return Return a pointer to the recognizer. The user has to invoke
// DestroyOnlineRecognizer() to free it to avoid memory leak.
extern "C" __declspec(dllexport)
SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(
const SherpaOnnxOnlineRecognizerConfig * config);
/// Free a pointer returned by CreateOnlineRecognizer()
///
/// @param p A pointer returned by CreateOnlineRecognizer()
extern "C" __declspec(dllexport)
void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer);
/// Create an online stream for accepting wave samples.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @return Return a pointer to an OnlineStream. The user has to invoke
/// DestroyOnlineStream() to free it to avoid memory leak.
extern "C" __declspec(dllexport)
SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(
const SherpaOnnxOnlineRecognizer* recognizer);
/// Destroy an online stream.
///
/// @param stream A pointer returned by CreateOnlineStream()
extern "C" __declspec(dllexport)
void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream);
/// Accept input audio samples and compute the features.
/// The user has to invoke DecodeOnlineStream() to run the neural network and
/// decoding.
///
/// @param stream A pointer returned by CreateOnlineStream().
/// @param sample_rate Sample rate of the input samples. If it is different
/// from config.feat_config.sample_rate, we will do
/// resampling inside sherpa-onnx.
/// @param samples A pointer to a 1-D array containing audio samples.
/// The range of samples has to be normalized to [-1, 1].
/// @param n Number of elements in the samples array.
extern "C" __declspec(dllexport)
void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,
const float* samples, int32_t n);
/// Return 1 if there are enough number of feature frames for decoding.
/// Return 0 otherwise.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer
/// @param stream A pointer returned by CreateOnlineStream
extern "C" __declspec(dllexport)
int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,
SherpaOnnxOnlineStream* stream);
/// Call this function to run the neural network model and decoding.
//
/// Precondition for this function: IsOnlineStreamReady() MUST return 1.
///
/// Usage example:
///
/// while (IsOnlineStreamReady(recognizer, stream)) {
/// DecodeOnlineStream(recognizer, stream);
/// }
///
extern "C" __declspec(dllexport)
void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,
SherpaOnnxOnlineStream* stream);
/// This function is similar to DecodeOnlineStream(). It decodes multiple
/// OnlineStream in parallel.
///
/// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
/// IsOnlineStreamReady() for that stream should return 1.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @param streams A pointer array containing pointers returned by
/// CreateOnlineRecognizer()
/// @param n Number of elements in the given streams array.
extern "C" __declspec(dllexport)
void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,
SherpaOnnxOnlineStream** streams, int32_t n);
/// Get the decoding results so far for an OnlineStream.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
/// @param stream A pointer returned by CreateOnlineStream().
/// @return A pointer containing the result. The user has to invoke
/// DestroyOnlineRecognizerResult() to free the returned pointer to
/// avoid memory leak.
extern "C" __declspec(dllexport)
SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(
SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream);
/// Destroy the pointer returned by GetOnlineStreamResult().
///
/// @param r A pointer returned by GetOnlineStreamResult()
extern "C" __declspec(dllexport)
void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r);
/// Reset an OnlineStream , which clears the neural network model state
/// and the state for decoding.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
/// @param stream A pointer returned by CreateOnlineStream
extern "C" __declspec(dllexport)
void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,
SherpaOnnxOnlineStream* stream);
/// Signal that no more audio samples would be available.
/// After this call, you cannot call AcceptWaveform() any more.
///
/// @param stream A pointer returned by CreateOnlineStream()
extern "C" __declspec(dllexport)
void __stdcall InputFinished(SherpaOnnxOnlineStream* stream);
/// Return 1 if an endpoint has been detected.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @param stream A pointer returned by CreateOnlineStream()
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
extern "C" __declspec(dllexport)
int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,
SherpaOnnxOnlineStream* stream);
// for displaying results on Linux/macOS.
typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
/// Create a display object. Must be freed using DestroyDisplay to avoid
/// memory leak.
extern "C" __declspec(dllexport)
SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line);
extern "C" __declspec(dllexport)
void __stdcall DestroyDisplay(SherpaOnnxDisplay* display);
/// Print the result.
extern "C" __declspec(dllexport)
void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s);
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif // SHERPA_ONNX_C_API_C_API_H_