manyeyes
Committed by GitHub

add online-api for csharp-api (#139)

Co-authored-by: zx <12345678>
  1 +// sherpa-onnx/cpp-api/c-api.cc
  2 +//
  3 +// Copyright (c) 2023 Xiaomi Corporation
  4 +
  5 +#include "online-api.h"
  6 +
  7 +#include <algorithm>
  8 +#include <memory>
  9 +#include <utility>
  10 +#include <vector>
  11 +
  12 +#include "../../sherpa-onnx/csrc/display.h"
  13 +#include "../../sherpa-onnx/csrc/online-recognizer.h"
  14 +namespace sherpa_onnx
  15 +{
  16 + struct SherpaOnnxOnlineRecognizer {
  17 + sherpa_onnx::OnlineRecognizer* impl;
  18 + };
  19 +
  20 + struct SherpaOnnxOnlineStream {
  21 + std::unique_ptr<sherpa_onnx::OnlineStream> impl;
  22 + explicit SherpaOnnxOnlineStream(std::unique_ptr<sherpa_onnx::OnlineStream> p)
  23 + : impl(std::move(p)) {}
  24 + };
  25 +
  26 + struct SherpaOnnxDisplay {
  27 + std::unique_ptr<sherpa_onnx::Display> impl;
  28 + };
  29 +
  30 + SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(
  31 + const SherpaOnnxOnlineRecognizerConfig* config) {
  32 + sherpa_onnx::OnlineRecognizerConfig recognizer_config;
  33 +
  34 + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
  35 + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
  36 +
  37 + recognizer_config.model_config.encoder_filename =
  38 + config->model_config.transducer.encoder;
  39 + recognizer_config.model_config.decoder_filename =
  40 + config->model_config.transducer.decoder;
  41 + recognizer_config.model_config.joiner_filename = config->model_config.transducer.joiner;
  42 + recognizer_config.model_config.tokens = config->model_config.tokens;
  43 + recognizer_config.model_config.num_threads = config->model_config.num_threads;
  44 + recognizer_config.model_config.debug = config->model_config.debug;
  45 +
  46 + recognizer_config.decoding_method = config->decoding_method;
  47 + recognizer_config.max_active_paths = config->max_active_paths;
  48 +
  49 + recognizer_config.enable_endpoint = config->enable_endpoint;
  50 +
  51 + recognizer_config.endpoint_config.rule1.min_trailing_silence =
  52 + config->rule1_min_trailing_silence;
  53 +
  54 + recognizer_config.endpoint_config.rule2.min_trailing_silence =
  55 + config->rule2_min_trailing_silence;
  56 +
  57 + recognizer_config.endpoint_config.rule3.min_utterance_length =
  58 + config->rule3_min_utterance_length;
  59 +
  60 + SherpaOnnxOnlineRecognizer* recognizer = new SherpaOnnxOnlineRecognizer;
  61 + recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config);
  62 +
  63 + return recognizer;
  64 + }
  65 +
  66 + void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer) {
  67 + delete recognizer->impl;
  68 + delete recognizer;
  69 + }
  70 +
  71 + SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(
  72 + const SherpaOnnxOnlineRecognizer* recognizer) {
  73 + SherpaOnnxOnlineStream* stream =
  74 + new SherpaOnnxOnlineStream(recognizer->impl->CreateStream());
  75 + return stream;
  76 + }
  77 +
  78 + void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream) { delete stream; }
  79 +
  80 + void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,
  81 + const float* samples, int32_t n) {
  82 + stream->impl->AcceptWaveform(sample_rate, samples, n);
  83 + }
  84 +
  85 + int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,
  86 + SherpaOnnxOnlineStream* stream) {
  87 + return recognizer->impl->IsReady(stream->impl.get());
  88 + }
  89 +
  90 + void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,
  91 + SherpaOnnxOnlineStream* stream) {
  92 + recognizer->impl->DecodeStream(stream->impl.get());
  93 + }
  94 +
  95 + void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,
  96 + SherpaOnnxOnlineStream** streams, int32_t n) {
  97 + std::vector<sherpa_onnx::OnlineStream*> ss(n);
  98 + for (int32_t i = 0; i != n; ++i) {
  99 + ss[i] = streams[i]->impl.get();
  100 + }
  101 + recognizer->impl->DecodeStreams(ss.data(), n);
  102 + }
  103 +
  104 + SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(
  105 + SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream) {
  106 + sherpa_onnx::OnlineRecognizerResult result =
  107 + recognizer->impl->GetResult(stream->impl.get());
  108 + const auto& text = result.text;
  109 +
  110 + auto r = new SherpaOnnxOnlineRecognizerResult;
  111 + r->text = new char[text.size() + 1];
  112 + std::copy(text.begin(), text.end(), const_cast<char*>(r->text));
  113 + const_cast<char*>(r->text)[text.size()] = 0;
  114 + r->text_len = text.size();
  115 + return r;
  116 + }
  117 +
  118 + void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r) {
  119 + delete[] r->text;
  120 + delete r;
  121 + }
  122 +
  123 + void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,
  124 + SherpaOnnxOnlineStream* stream) {
  125 + recognizer->impl->Reset(stream->impl.get());
  126 + }
  127 +
  128 + void __stdcall InputFinished(SherpaOnnxOnlineStream* stream) {
  129 + stream->impl->InputFinished();
  130 + }
  131 +
  132 + int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,
  133 + SherpaOnnxOnlineStream* stream) {
  134 + return recognizer->impl->IsEndpoint(stream->impl.get());
  135 + }
  136 +
  137 + SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line) {
  138 + SherpaOnnxDisplay* ans = new SherpaOnnxDisplay;
  139 + ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line);
  140 + return ans;
  141 + }
  142 +
  143 + void __stdcall DestroyDisplay(SherpaOnnxDisplay* display) { delete display; }
  144 +
  145 + void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s) {
  146 + display->impl->Print(idx, s);
  147 + }
  148 +}
  1 +// sherpa-onnx/cpp-api/c-api.h
  2 +//
  3 +// Copyright (c) 2023 Xiaomi Corporation
  4 +
  5 +// C API for sherpa-onnx
  6 +//
  7 +// Please refer to
  8 +// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
  9 +// for usages.
  10 +//
  11 +
  12 +#ifndef SHERPA_ONNX_CPP_API_C_API_H_
  13 +#define SHERPA_ONNX_CPP_API_C_API_H_
  14 +
  15 +#include <stdint.h>
  16 +
  17 +#ifdef __cplusplus
  18 +extern "C" {
  19 +#endif
  20 + namespace sherpa_onnx
  21 + {
  22 + /// Please refer to
  23 + /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  24 + /// to download pre-trained models. That is, you can find encoder-xxx.onnx
  25 + /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
  26 + /// from there.
  27 + typedef struct SherpaOnnxOnlineTransducer {
  28 + const char* encoder;
  29 + const char* decoder;
  30 + const char* joiner;
  31 + } SherpaOnnxOnlineTransducer;
  32 +
  33 + typedef struct SherpaOnnxOnlineModelConfig
  34 + {
  35 + const SherpaOnnxOnlineTransducer transducer;
  36 + const char* tokens;
  37 + const int32_t num_threads;
  38 + const bool debug; // true to print debug information of the model
  39 + }SherpaOnnxOnlineModelConfig;
  40 +
  41 + /// It expects 16 kHz 16-bit single channel wave format.
  42 + typedef struct SherpaOnnxFeatureConfig {
  43 + /// Sample rate of the input data. MUST match the one expected
  44 + /// by the model. For instance, it should be 16000 for models provided
  45 + /// by us.
  46 + int32_t sample_rate;
  47 +
  48 + /// Feature dimension of the model.
  49 + /// For instance, it should be 80 for models provided by us.
  50 + int32_t feature_dim;
  51 + } SherpaOnnxFeatureConfig;
  52 +
  53 + typedef struct SherpaOnnxOnlineRecognizerConfig {
  54 + SherpaOnnxFeatureConfig feat_config;
  55 + SherpaOnnxOnlineModelConfig model_config;
  56 +
  57 + /// Possible values are: greedy_search, modified_beam_search
  58 + const char* decoding_method;
  59 +
  60 + /// Used only when decoding_method is modified_beam_search
  61 + /// Example value: 4
  62 + int32_t max_active_paths;
  63 +
  64 + /// 0 to disable endpoint detection.
  65 + /// A non-zero value to enable endpoint detection.
  66 + int enable_endpoint;
  67 +
  68 + /// An endpoint is detected if trailing silence in seconds is larger than
  69 + /// this value even if nothing has been decoded.
  70 + /// Used only when enable_endpoint is not 0.
  71 + float rule1_min_trailing_silence;
  72 +
  73 + /// An endpoint is detected if trailing silence in seconds is larger than
  74 + /// this value after something that is not blank has been decoded.
  75 + /// Used only when enable_endpoint is not 0.
  76 + float rule2_min_trailing_silence;
  77 +
  78 + /// An endpoint is detected if the utterance in seconds is larger than
  79 + /// this value.
  80 + /// Used only when enable_endpoint is not 0.
  81 + float rule3_min_utterance_length;
  82 + } SherpaOnnxOnlineRecognizerConfig;
  83 +
  84 + typedef struct SherpaOnnxOnlineRecognizerResult {
  85 + const char* text;
  86 + int text_len;
  87 + // TODO(fangjun): Add more fields
  88 + } SherpaOnnxOnlineRecognizerResult;
  89 +
  90 + /// Note: OnlineRecognizer here means StreamingRecognizer.
  91 + /// It does not need to access the Internet during recognition.
  92 + /// Everything is run locally.
  93 + typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;
  94 + typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;
  95 +
  96 + /// @param config Config for the recongizer.
  97 + /// @return Return a pointer to the recognizer. The user has to invoke
  98 + // DestroyOnlineRecognizer() to free it to avoid memory leak.
  99 + extern "C" __declspec(dllexport)
  100 + SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(
  101 + const SherpaOnnxOnlineRecognizerConfig * config);
  102 +
  103 + /// Free a pointer returned by CreateOnlineRecognizer()
  104 + ///
  105 + /// @param p A pointer returned by CreateOnlineRecognizer()
  106 + extern "C" __declspec(dllexport)
  107 + void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer);
  108 +
  109 + /// Create an online stream for accepting wave samples.
  110 + ///
  111 + /// @param recognizer A pointer returned by CreateOnlineRecognizer()
  112 + /// @return Return a pointer to an OnlineStream. The user has to invoke
  113 + /// DestroyOnlineStream() to free it to avoid memory leak.
  114 + extern "C" __declspec(dllexport)
  115 + SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(
  116 + const SherpaOnnxOnlineRecognizer* recognizer);
  117 +
  118 + /// Destroy an online stream.
  119 + ///
  120 + /// @param stream A pointer returned by CreateOnlineStream()
  121 + extern "C" __declspec(dllexport)
  122 + void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream);
  123 +
  124 + /// Accept input audio samples and compute the features.
  125 + /// The user has to invoke DecodeOnlineStream() to run the neural network and
  126 + /// decoding.
  127 + ///
  128 + /// @param stream A pointer returned by CreateOnlineStream().
  129 + /// @param sample_rate Sample rate of the input samples. If it is different
  130 + /// from config.feat_config.sample_rate, we will do
  131 + /// resampling inside sherpa-onnx.
  132 + /// @param samples A pointer to a 1-D array containing audio samples.
  133 + /// The range of samples has to be normalized to [-1, 1].
  134 + /// @param n Number of elements in the samples array.
  135 + extern "C" __declspec(dllexport)
  136 + void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,
  137 + const float* samples, int32_t n);
  138 +
  139 + /// Return 1 if there are enough number of feature frames for decoding.
  140 + /// Return 0 otherwise.
  141 + ///
  142 + /// @param recognizer A pointer returned by CreateOnlineRecognizer
  143 + /// @param stream A pointer returned by CreateOnlineStream
  144 + extern "C" __declspec(dllexport)
  145 + int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,
  146 + SherpaOnnxOnlineStream* stream);
  147 +
  148 + /// Call this function to run the neural network model and decoding.
  149 + //
  150 + /// Precondition for this function: IsOnlineStreamReady() MUST return 1.
  151 + ///
  152 + /// Usage example:
  153 + ///
  154 + /// while (IsOnlineStreamReady(recognizer, stream)) {
  155 + /// DecodeOnlineStream(recognizer, stream);
  156 + /// }
  157 + ///
  158 + extern "C" __declspec(dllexport)
  159 + void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,
  160 + SherpaOnnxOnlineStream* stream);
  161 +
  162 + /// This function is similar to DecodeOnlineStream(). It decodes multiple
  163 + /// OnlineStream in parallel.
  164 + ///
  165 + /// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
  166 + /// IsOnlineStreamReady() for that stream should return 1.
  167 + ///
  168 + /// @param recognizer A pointer returned by CreateOnlineRecognizer()
  169 + /// @param streams A pointer array containing pointers returned by
  170 + /// CreateOnlineRecognizer()
  171 + /// @param n Number of elements in the given streams array.
  172 + extern "C" __declspec(dllexport)
  173 + void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,
  174 + SherpaOnnxOnlineStream** streams, int32_t n);
  175 +
  176 + /// Get the decoding results so far for an OnlineStream.
  177 + ///
  178 + /// @param recognizer A pointer returned by CreateOnlineRecognizer().
  179 + /// @param stream A pointer returned by CreateOnlineStream().
  180 + /// @return A pointer containing the result. The user has to invoke
  181 + /// DestroyOnlineRecognizerResult() to free the returned pointer to
  182 + /// avoid memory leak.
  183 + extern "C" __declspec(dllexport)
  184 + SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(
  185 + SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream);
  186 +
  187 + /// Destroy the pointer returned by GetOnlineStreamResult().
  188 + ///
  189 + /// @param r A pointer returned by GetOnlineStreamResult()
  190 + extern "C" __declspec(dllexport)
  191 + void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r);
  192 +
  193 + /// Reset an OnlineStream , which clears the neural network model state
  194 + /// and the state for decoding.
  195 + ///
  196 + /// @param recognizer A pointer returned by CreateOnlineRecognizer().
  197 + /// @param stream A pointer returned by CreateOnlineStream
  198 + extern "C" __declspec(dllexport)
  199 + void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,
  200 + SherpaOnnxOnlineStream* stream);
  201 +
  202 + /// Signal that no more audio samples would be available.
  203 + /// After this call, you cannot call AcceptWaveform() any more.
  204 + ///
  205 + /// @param stream A pointer returned by CreateOnlineStream()
  206 + extern "C" __declspec(dllexport)
  207 + void __stdcall InputFinished(SherpaOnnxOnlineStream* stream);
  208 +
  209 + /// Return 1 if an endpoint has been detected.
  210 + ///
  211 + /// @param recognizer A pointer returned by CreateOnlineRecognizer()
  212 + /// @param stream A pointer returned by CreateOnlineStream()
  213 + /// @return Return 1 if an endpoint is detected. Return 0 otherwise.
  214 + extern "C" __declspec(dllexport)
  215 + int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,
  216 + SherpaOnnxOnlineStream* stream);
  217 +
  218 + // for displaying results on Linux/macOS.
  219 + typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
  220 +
  221 + /// Create a display object. Must be freed using DestroyDisplay to avoid
  222 + /// memory leak.
  223 + extern "C" __declspec(dllexport)
  224 + SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line);
  225 +
  226 + extern "C" __declspec(dllexport)
  227 + void __stdcall DestroyDisplay(SherpaOnnxDisplay* display);
  228 +
  229 + /// Print the result.
  230 + extern "C" __declspec(dllexport)
  231 + void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s);
  232 + }
  233 +
  234 +#ifdef __cplusplus
  235 +} /* extern "C" */
  236 +#endif
  237 +
  238 +#endif // SHERPA_ONNX_C_API_C_API_H_