Committed by
GitHub
add online-api for csharp-api (#139)
Co-authored-by: zx <12345678>
正在显示
2 个修改的文件
包含
386 行增加
和
0 行删除
sherpa-onnx/csharp-api/online-api.cc
0 → 100644
| 1 | +// sherpa-onnx/cpp-api/c-api.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#include "online-api.h" | ||
| 6 | + | ||
| 7 | +#include <algorithm> | ||
| 8 | +#include <memory> | ||
| 9 | +#include <utility> | ||
| 10 | +#include <vector> | ||
| 11 | + | ||
| 12 | +#include "../../sherpa-onnx/csrc/display.h" | ||
| 13 | +#include "../../sherpa-onnx/csrc/online-recognizer.h" | ||
| 14 | +namespace sherpa_onnx | ||
| 15 | +{ | ||
| 16 | + struct SherpaOnnxOnlineRecognizer { | ||
| 17 | + sherpa_onnx::OnlineRecognizer* impl; | ||
| 18 | + }; | ||
| 19 | + | ||
| 20 | + struct SherpaOnnxOnlineStream { | ||
| 21 | + std::unique_ptr<sherpa_onnx::OnlineStream> impl; | ||
| 22 | + explicit SherpaOnnxOnlineStream(std::unique_ptr<sherpa_onnx::OnlineStream> p) | ||
| 23 | + : impl(std::move(p)) {} | ||
| 24 | + }; | ||
| 25 | + | ||
| 26 | + struct SherpaOnnxDisplay { | ||
| 27 | + std::unique_ptr<sherpa_onnx::Display> impl; | ||
| 28 | + }; | ||
| 29 | + | ||
| 30 | + SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer( | ||
| 31 | + const SherpaOnnxOnlineRecognizerConfig* config) { | ||
| 32 | + sherpa_onnx::OnlineRecognizerConfig recognizer_config; | ||
| 33 | + | ||
| 34 | + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | ||
| 35 | + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | ||
| 36 | + | ||
| 37 | + recognizer_config.model_config.encoder_filename = | ||
| 38 | + config->model_config.transducer.encoder; | ||
| 39 | + recognizer_config.model_config.decoder_filename = | ||
| 40 | + config->model_config.transducer.decoder; | ||
| 41 | + recognizer_config.model_config.joiner_filename = config->model_config.transducer.joiner; | ||
| 42 | + recognizer_config.model_config.tokens = config->model_config.tokens; | ||
| 43 | + recognizer_config.model_config.num_threads = config->model_config.num_threads; | ||
| 44 | + recognizer_config.model_config.debug = config->model_config.debug; | ||
| 45 | + | ||
| 46 | + recognizer_config.decoding_method = config->decoding_method; | ||
| 47 | + recognizer_config.max_active_paths = config->max_active_paths; | ||
| 48 | + | ||
| 49 | + recognizer_config.enable_endpoint = config->enable_endpoint; | ||
| 50 | + | ||
| 51 | + recognizer_config.endpoint_config.rule1.min_trailing_silence = | ||
| 52 | + config->rule1_min_trailing_silence; | ||
| 53 | + | ||
| 54 | + recognizer_config.endpoint_config.rule2.min_trailing_silence = | ||
| 55 | + config->rule2_min_trailing_silence; | ||
| 56 | + | ||
| 57 | + recognizer_config.endpoint_config.rule3.min_utterance_length = | ||
| 58 | + config->rule3_min_utterance_length; | ||
| 59 | + | ||
| 60 | + SherpaOnnxOnlineRecognizer* recognizer = new SherpaOnnxOnlineRecognizer; | ||
| 61 | + recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config); | ||
| 62 | + | ||
| 63 | + return recognizer; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer) { | ||
| 67 | + delete recognizer->impl; | ||
| 68 | + delete recognizer; | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + SherpaOnnxOnlineStream* __stdcall CreateOnlineStream( | ||
| 72 | + const SherpaOnnxOnlineRecognizer* recognizer) { | ||
| 73 | + SherpaOnnxOnlineStream* stream = | ||
| 74 | + new SherpaOnnxOnlineStream(recognizer->impl->CreateStream()); | ||
| 75 | + return stream; | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream) { delete stream; } | ||
| 79 | + | ||
| 80 | + void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate, | ||
| 81 | + const float* samples, int32_t n) { | ||
| 82 | + stream->impl->AcceptWaveform(sample_rate, samples, n); | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 86 | + SherpaOnnxOnlineStream* stream) { | ||
| 87 | + return recognizer->impl->IsReady(stream->impl.get()); | ||
| 88 | + } | ||
| 89 | + | ||
| 90 | + void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 91 | + SherpaOnnxOnlineStream* stream) { | ||
| 92 | + recognizer->impl->DecodeStream(stream->impl.get()); | ||
| 93 | + } | ||
| 94 | + | ||
| 95 | + void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 96 | + SherpaOnnxOnlineStream** streams, int32_t n) { | ||
| 97 | + std::vector<sherpa_onnx::OnlineStream*> ss(n); | ||
| 98 | + for (int32_t i = 0; i != n; ++i) { | ||
| 99 | + ss[i] = streams[i]->impl.get(); | ||
| 100 | + } | ||
| 101 | + recognizer->impl->DecodeStreams(ss.data(), n); | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult( | ||
| 105 | + SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream) { | ||
| 106 | + sherpa_onnx::OnlineRecognizerResult result = | ||
| 107 | + recognizer->impl->GetResult(stream->impl.get()); | ||
| 108 | + const auto& text = result.text; | ||
| 109 | + | ||
| 110 | + auto r = new SherpaOnnxOnlineRecognizerResult; | ||
| 111 | + r->text = new char[text.size() + 1]; | ||
| 112 | + std::copy(text.begin(), text.end(), const_cast<char*>(r->text)); | ||
| 113 | + const_cast<char*>(r->text)[text.size()] = 0; | ||
| 114 | + r->text_len = text.size(); | ||
| 115 | + return r; | ||
| 116 | + } | ||
| 117 | + | ||
| 118 | + void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r) { | ||
| 119 | + delete[] r->text; | ||
| 120 | + delete r; | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 124 | + SherpaOnnxOnlineStream* stream) { | ||
| 125 | + recognizer->impl->Reset(stream->impl.get()); | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + void __stdcall InputFinished(SherpaOnnxOnlineStream* stream) { | ||
| 129 | + stream->impl->InputFinished(); | ||
| 130 | + } | ||
| 131 | + | ||
| 132 | + int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 133 | + SherpaOnnxOnlineStream* stream) { | ||
| 134 | + return recognizer->impl->IsEndpoint(stream->impl.get()); | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line) { | ||
| 138 | + SherpaOnnxDisplay* ans = new SherpaOnnxDisplay; | ||
| 139 | + ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line); | ||
| 140 | + return ans; | ||
| 141 | + } | ||
| 142 | + | ||
| 143 | + void __stdcall DestroyDisplay(SherpaOnnxDisplay* display) { delete display; } | ||
| 144 | + | ||
| 145 | + void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s) { | ||
| 146 | + display->impl->Print(idx, s); | ||
| 147 | + } | ||
| 148 | +} |
sherpa-onnx/csharp-api/online-api.h
0 → 100644
| 1 | +// sherpa-onnx/cpp-api/c-api.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// C API for sherpa-onnx | ||
| 6 | +// | ||
| 7 | +// Please refer to | ||
| 8 | +// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c | ||
| 9 | +// for usages. | ||
| 10 | +// | ||
| 11 | + | ||
| 12 | +#ifndef SHERPA_ONNX_CPP_API_C_API_H_ | ||
| 13 | +#define SHERPA_ONNX_CPP_API_C_API_H_ | ||
| 14 | + | ||
| 15 | +#include <stdint.h> | ||
| 16 | + | ||
| 17 | +#ifdef __cplusplus | ||
| 18 | +extern "C" { | ||
| 19 | +#endif | ||
| 20 | + namespace sherpa_onnx | ||
| 21 | + { | ||
| 22 | + /// Please refer to | ||
| 23 | + /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 24 | + /// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 25 | + /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 26 | + /// from there. | ||
| 27 | + typedef struct SherpaOnnxOnlineTransducer { | ||
| 28 | + const char* encoder; | ||
| 29 | + const char* decoder; | ||
| 30 | + const char* joiner; | ||
| 31 | + } SherpaOnnxOnlineTransducer; | ||
| 32 | + | ||
| 33 | + typedef struct SherpaOnnxOnlineModelConfig | ||
| 34 | + { | ||
| 35 | + const SherpaOnnxOnlineTransducer transducer; | ||
| 36 | + const char* tokens; | ||
| 37 | + const int32_t num_threads; | ||
| 38 | + const bool debug; // true to print debug information of the model | ||
| 39 | + }SherpaOnnxOnlineModelConfig; | ||
| 40 | + | ||
| 41 | + /// It expects 16 kHz 16-bit single channel wave format. | ||
| 42 | + typedef struct SherpaOnnxFeatureConfig { | ||
| 43 | + /// Sample rate of the input data. MUST match the one expected | ||
| 44 | + /// by the model. For instance, it should be 16000 for models provided | ||
| 45 | + /// by us. | ||
| 46 | + int32_t sample_rate; | ||
| 47 | + | ||
| 48 | + /// Feature dimension of the model. | ||
| 49 | + /// For instance, it should be 80 for models provided by us. | ||
| 50 | + int32_t feature_dim; | ||
| 51 | + } SherpaOnnxFeatureConfig; | ||
| 52 | + | ||
| 53 | + typedef struct SherpaOnnxOnlineRecognizerConfig { | ||
| 54 | + SherpaOnnxFeatureConfig feat_config; | ||
| 55 | + SherpaOnnxOnlineModelConfig model_config; | ||
| 56 | + | ||
| 57 | + /// Possible values are: greedy_search, modified_beam_search | ||
| 58 | + const char* decoding_method; | ||
| 59 | + | ||
| 60 | + /// Used only when decoding_method is modified_beam_search | ||
| 61 | + /// Example value: 4 | ||
| 62 | + int32_t max_active_paths; | ||
| 63 | + | ||
| 64 | + /// 0 to disable endpoint detection. | ||
| 65 | + /// A non-zero value to enable endpoint detection. | ||
| 66 | + int enable_endpoint; | ||
| 67 | + | ||
| 68 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 69 | + /// this value even if nothing has been decoded. | ||
| 70 | + /// Used only when enable_endpoint is not 0. | ||
| 71 | + float rule1_min_trailing_silence; | ||
| 72 | + | ||
| 73 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 74 | + /// this value after something that is not blank has been decoded. | ||
| 75 | + /// Used only when enable_endpoint is not 0. | ||
| 76 | + float rule2_min_trailing_silence; | ||
| 77 | + | ||
| 78 | + /// An endpoint is detected if the utterance in seconds is larger than | ||
| 79 | + /// this value. | ||
| 80 | + /// Used only when enable_endpoint is not 0. | ||
| 81 | + float rule3_min_utterance_length; | ||
| 82 | + } SherpaOnnxOnlineRecognizerConfig; | ||
| 83 | + | ||
| 84 | + typedef struct SherpaOnnxOnlineRecognizerResult { | ||
| 85 | + const char* text; | ||
| 86 | + int text_len; | ||
| 87 | + // TODO(fangjun): Add more fields | ||
| 88 | + } SherpaOnnxOnlineRecognizerResult; | ||
| 89 | + | ||
| 90 | + /// Note: OnlineRecognizer here means StreamingRecognizer. | ||
| 91 | + /// It does not need to access the Internet during recognition. | ||
| 92 | + /// Everything is run locally. | ||
| 93 | + typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer; | ||
| 94 | + typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; | ||
| 95 | + | ||
| 96 | + /// @param config Config for the recongizer. | ||
| 97 | + /// @return Return a pointer to the recognizer. The user has to invoke | ||
| 98 | + // DestroyOnlineRecognizer() to free it to avoid memory leak. | ||
| 99 | + extern "C" __declspec(dllexport) | ||
| 100 | + SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer( | ||
| 101 | + const SherpaOnnxOnlineRecognizerConfig * config); | ||
| 102 | + | ||
| 103 | + /// Free a pointer returned by CreateOnlineRecognizer() | ||
| 104 | + /// | ||
| 105 | + /// @param p A pointer returned by CreateOnlineRecognizer() | ||
| 106 | + extern "C" __declspec(dllexport) | ||
| 107 | + void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer); | ||
| 108 | + | ||
| 109 | + /// Create an online stream for accepting wave samples. | ||
| 110 | + /// | ||
| 111 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 112 | + /// @return Return a pointer to an OnlineStream. The user has to invoke | ||
| 113 | + /// DestroyOnlineStream() to free it to avoid memory leak. | ||
| 114 | + extern "C" __declspec(dllexport) | ||
| 115 | + SherpaOnnxOnlineStream* __stdcall CreateOnlineStream( | ||
| 116 | + const SherpaOnnxOnlineRecognizer* recognizer); | ||
| 117 | + | ||
| 118 | + /// Destroy an online stream. | ||
| 119 | + /// | ||
| 120 | + /// @param stream A pointer returned by CreateOnlineStream() | ||
| 121 | + extern "C" __declspec(dllexport) | ||
| 122 | + void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream); | ||
| 123 | + | ||
| 124 | + /// Accept input audio samples and compute the features. | ||
| 125 | + /// The user has to invoke DecodeOnlineStream() to run the neural network and | ||
| 126 | + /// decoding. | ||
| 127 | + /// | ||
| 128 | + /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 129 | + /// @param sample_rate Sample rate of the input samples. If it is different | ||
| 130 | + /// from config.feat_config.sample_rate, we will do | ||
| 131 | + /// resampling inside sherpa-onnx. | ||
| 132 | + /// @param samples A pointer to a 1-D array containing audio samples. | ||
| 133 | + /// The range of samples has to be normalized to [-1, 1]. | ||
| 134 | + /// @param n Number of elements in the samples array. | ||
| 135 | + extern "C" __declspec(dllexport) | ||
| 136 | + void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate, | ||
| 137 | + const float* samples, int32_t n); | ||
| 138 | + | ||
| 139 | + /// Return 1 if there are enough number of feature frames for decoding. | ||
| 140 | + /// Return 0 otherwise. | ||
| 141 | + /// | ||
| 142 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer | ||
| 143 | + /// @param stream A pointer returned by CreateOnlineStream | ||
| 144 | + extern "C" __declspec(dllexport) | ||
| 145 | + int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 146 | + SherpaOnnxOnlineStream* stream); | ||
| 147 | + | ||
| 148 | + /// Call this function to run the neural network model and decoding. | ||
| 149 | + // | ||
| 150 | + /// Precondition for this function: IsOnlineStreamReady() MUST return 1. | ||
| 151 | + /// | ||
| 152 | + /// Usage example: | ||
| 153 | + /// | ||
| 154 | + /// while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 155 | + /// DecodeOnlineStream(recognizer, stream); | ||
| 156 | + /// } | ||
| 157 | + /// | ||
| 158 | + extern "C" __declspec(dllexport) | ||
| 159 | + void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 160 | + SherpaOnnxOnlineStream* stream); | ||
| 161 | + | ||
| 162 | + /// This function is similar to DecodeOnlineStream(). It decodes multiple | ||
| 163 | + /// OnlineStream in parallel. | ||
| 164 | + /// | ||
| 165 | + /// Caution: The caller has to ensure each OnlineStream is ready, i.e., | ||
| 166 | + /// IsOnlineStreamReady() for that stream should return 1. | ||
| 167 | + /// | ||
| 168 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 169 | + /// @param streams A pointer array containing pointers returned by | ||
| 170 | + /// CreateOnlineRecognizer() | ||
| 171 | + /// @param n Number of elements in the given streams array. | ||
| 172 | + extern "C" __declspec(dllexport) | ||
| 173 | + void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 174 | + SherpaOnnxOnlineStream** streams, int32_t n); | ||
| 175 | + | ||
| 176 | + /// Get the decoding results so far for an OnlineStream. | ||
| 177 | + /// | ||
| 178 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 179 | + /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 180 | + /// @return A pointer containing the result. The user has to invoke | ||
| 181 | + /// DestroyOnlineRecognizerResult() to free the returned pointer to | ||
| 182 | + /// avoid memory leak. | ||
| 183 | + extern "C" __declspec(dllexport) | ||
| 184 | + SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult( | ||
| 185 | + SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream); | ||
| 186 | + | ||
| 187 | + /// Destroy the pointer returned by GetOnlineStreamResult(). | ||
| 188 | + /// | ||
| 189 | + /// @param r A pointer returned by GetOnlineStreamResult() | ||
| 190 | + extern "C" __declspec(dllexport) | ||
| 191 | + void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r); | ||
| 192 | + | ||
| 193 | + /// Reset an OnlineStream , which clears the neural network model state | ||
| 194 | + /// and the state for decoding. | ||
| 195 | + /// | ||
| 196 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 197 | + /// @param stream A pointer returned by CreateOnlineStream | ||
| 198 | + extern "C" __declspec(dllexport) | ||
| 199 | + void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 200 | + SherpaOnnxOnlineStream* stream); | ||
| 201 | + | ||
| 202 | + /// Signal that no more audio samples would be available. | ||
| 203 | + /// After this call, you cannot call AcceptWaveform() any more. | ||
| 204 | + /// | ||
| 205 | + /// @param stream A pointer returned by CreateOnlineStream() | ||
| 206 | + extern "C" __declspec(dllexport) | ||
| 207 | + void __stdcall InputFinished(SherpaOnnxOnlineStream* stream); | ||
| 208 | + | ||
| 209 | + /// Return 1 if an endpoint has been detected. | ||
| 210 | + /// | ||
| 211 | + /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 212 | + /// @param stream A pointer returned by CreateOnlineStream() | ||
| 213 | + /// @return Return 1 if an endpoint is detected. Return 0 otherwise. | ||
| 214 | + extern "C" __declspec(dllexport) | ||
| 215 | + int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 216 | + SherpaOnnxOnlineStream* stream); | ||
| 217 | + | ||
| 218 | + // for displaying results on Linux/macOS. | ||
| 219 | + typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; | ||
| 220 | + | ||
| 221 | + /// Create a display object. Must be freed using DestroyDisplay to avoid | ||
| 222 | + /// memory leak. | ||
| 223 | + extern "C" __declspec(dllexport) | ||
| 224 | + SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line); | ||
| 225 | + | ||
| 226 | + extern "C" __declspec(dllexport) | ||
| 227 | + void __stdcall DestroyDisplay(SherpaOnnxDisplay* display); | ||
| 228 | + | ||
| 229 | + /// Print the result. | ||
| 230 | + extern "C" __declspec(dllexport) | ||
| 231 | + void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s); | ||
| 232 | + } | ||
| 233 | + | ||
| 234 | +#ifdef __cplusplus | ||
| 235 | +} /* extern "C" */ | ||
| 236 | +#endif | ||
| 237 | + | ||
| 238 | +#endif // SHERPA_ONNX_C_API_C_API_H_ |
-
请 注册 或 登录 后发表评论