online-api.h 9.0 KB
// sherpa-onnx/cpp-api/c-api.h
//
// Copyright (c)  2023  Xiaomi Corporation

// C API for sherpa-onnx
//
// Please refer to
// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
// for usages.
//

#ifndef SHERPA_ONNX_CPP_API_C_API_H_
#define SHERPA_ONNX_CPP_API_C_API_H_

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif
	namespace sherpa_onnx
	{
		/// Please refer to
		/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
		/// to download pre-trained models. That is, you can find encoder-xxx.onnx
		/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
		/// from there.
		typedef struct SherpaOnnxOnlineTransducer {
			const char* encoder;
			const char* decoder;
			const char* joiner;
		} SherpaOnnxOnlineTransducer;

		typedef struct SherpaOnnxOnlineModelConfig
		{
			const SherpaOnnxOnlineTransducer transducer;
			const char* tokens;
			const int32_t num_threads;
			const bool debug;  // true to print debug information of the model
		}SherpaOnnxOnlineModelConfig;

		/// It expects 16 kHz 16-bit single channel wave format.
		typedef struct SherpaOnnxFeatureConfig {
			/// Sample rate of the input data. MUST match the one expected
			/// by the model. For instance, it should be 16000 for models provided
			/// by us.
			int32_t sample_rate;

			/// Feature dimension of the model.
			/// For instance, it should be 80 for models provided by us.
			int32_t feature_dim;
		} SherpaOnnxFeatureConfig;

		typedef struct SherpaOnnxOnlineRecognizerConfig {
			SherpaOnnxFeatureConfig feat_config;
			SherpaOnnxOnlineModelConfig model_config;

			/// Possible values are: greedy_search, modified_beam_search
			const char* decoding_method;

			/// Used only when decoding_method is modified_beam_search
			/// Example value: 4
			int32_t max_active_paths;

			/// 0 to disable endpoint detection.
			/// A non-zero value to enable endpoint detection.
			int enable_endpoint;

			/// An endpoint is detected if trailing silence in seconds is larger than
			/// this value even if nothing has been decoded.
			/// Used only when enable_endpoint is not 0.
			float rule1_min_trailing_silence;

			/// An endpoint is detected if trailing silence in seconds is larger than
			/// this value after something that is not blank has been decoded.
			/// Used only when enable_endpoint is not 0.
			float rule2_min_trailing_silence;

			/// An endpoint is detected if the utterance in seconds is larger than
			/// this value.
			/// Used only when enable_endpoint is not 0.
			float rule3_min_utterance_length;
		} SherpaOnnxOnlineRecognizerConfig;

		typedef struct SherpaOnnxOnlineRecognizerResult {
			const char* text;
			int text_len;
			// TODO(fangjun): Add more fields
		} SherpaOnnxOnlineRecognizerResult;

		/// Note: OnlineRecognizer here means StreamingRecognizer.
		/// It does not need to access the Internet during recognition.
		/// Everything is run locally.
		typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer;
		typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream;

		/// @param config  Config for the recongizer.
		/// @return Return a pointer to the recognizer. The user has to invoke
		//          DestroyOnlineRecognizer() to free it to avoid memory leak.
		extern "C" __declspec(dllexport)
			SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer(
				const SherpaOnnxOnlineRecognizerConfig * config);

		/// Free a pointer returned by CreateOnlineRecognizer()
		///
		/// @param p A pointer returned by CreateOnlineRecognizer()
		extern "C" __declspec(dllexport)
			void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer);

		/// Create an online stream for accepting wave samples.
		///
		/// @param recognizer  A pointer returned by CreateOnlineRecognizer()
		/// @return Return a pointer to an OnlineStream. The user has to invoke
		///         DestroyOnlineStream() to free it to avoid memory leak.
		extern "C" __declspec(dllexport)
			SherpaOnnxOnlineStream* __stdcall CreateOnlineStream(
				const SherpaOnnxOnlineRecognizer* recognizer);

		/// Destroy an online stream.
		///
		/// @param stream A pointer returned by CreateOnlineStream()
		extern "C" __declspec(dllexport)
			void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream);

		/// Accept input audio samples and compute the features.
		/// The user has to invoke DecodeOnlineStream() to run the neural network and
		/// decoding.
		///
		/// @param stream  A pointer returned by CreateOnlineStream().
		/// @param sample_rate  Sample rate of the input samples. If it is different
		///                     from config.feat_config.sample_rate, we will do
		///                     resampling inside sherpa-onnx.
		/// @param samples A pointer to a 1-D array containing audio samples.
		///                The range of samples has to be normalized to [-1, 1].
		/// @param n  Number of elements in the samples array.
		extern "C" __declspec(dllexport)
			void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate,
				const float* samples, int32_t n);

		/// Return 1 if there are enough number of feature frames for decoding.
		/// Return 0 otherwise.
		///
		/// @param recognizer  A pointer returned by CreateOnlineRecognizer
		/// @param stream  A pointer returned by CreateOnlineStream
		extern "C" __declspec(dllexport)
			int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer,
				SherpaOnnxOnlineStream* stream);

		/// Call this function to run the neural network model and decoding.
		//
		/// Precondition for this function: IsOnlineStreamReady() MUST return 1.
		///
		/// Usage example:
		///
		///  while (IsOnlineStreamReady(recognizer, stream)) {
		///     DecodeOnlineStream(recognizer, stream);
		///  }
		///
		extern "C" __declspec(dllexport)
			void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer,
				SherpaOnnxOnlineStream* stream);

		/// This function is similar to DecodeOnlineStream(). It decodes multiple
		/// OnlineStream in parallel.
		///
		/// Caution: The caller has to ensure each OnlineStream is ready, i.e.,
		/// IsOnlineStreamReady() for that stream should return 1.
		///
		/// @param recognizer  A pointer returned by CreateOnlineRecognizer()
		/// @param streams  A pointer array containing pointers returned by
		///                 CreateOnlineRecognizer()
		/// @param n  Number of elements in the given streams array.
		extern "C" __declspec(dllexport)
			void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer,
				SherpaOnnxOnlineStream** streams, int32_t n);

		/// Get the decoding results so far for an OnlineStream.
		///
		/// @param recognizer A pointer returned by CreateOnlineRecognizer().
		/// @param stream A pointer returned by CreateOnlineStream().
		/// @return A pointer containing the result. The user has to invoke
		///         DestroyOnlineRecognizerResult() to free the returned pointer to
		///         avoid memory leak.
		extern "C" __declspec(dllexport)
			SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult(
				SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream);

		/// Destroy the pointer returned by GetOnlineStreamResult().
		///
		/// @param r A pointer returned by GetOnlineStreamResult()
		extern "C" __declspec(dllexport)
			void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r);

		/// Reset an OnlineStream , which clears the neural network model state
		/// and the state for decoding.
		///
		/// @param recognizer A pointer returned by CreateOnlineRecognizer().
		/// @param stream A pointer returned by CreateOnlineStream
		extern "C" __declspec(dllexport)
			void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer,
				SherpaOnnxOnlineStream* stream);

		/// Signal that no more audio samples would be available.
		/// After this call, you cannot call AcceptWaveform() any more.
		///
		/// @param stream A pointer returned by CreateOnlineStream()
		extern "C" __declspec(dllexport)
			void __stdcall InputFinished(SherpaOnnxOnlineStream* stream);

		/// Return 1 if an endpoint has been detected.
		///
		/// @param recognizer A pointer returned by CreateOnlineRecognizer()
		/// @param stream A pointer returned by CreateOnlineStream()
		/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
		extern "C" __declspec(dllexport)
			int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer,
				SherpaOnnxOnlineStream* stream);

		// for displaying results on Linux/macOS.
		typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;

		/// Create a display object. Must be freed using DestroyDisplay to avoid
		/// memory leak.
		extern "C" __declspec(dllexport)
			SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line);

		extern "C" __declspec(dllexport)
			void __stdcall DestroyDisplay(SherpaOnnxDisplay* display);

		/// Print the result.
		extern "C" __declspec(dllexport)
			void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s);
	}

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif  // SHERPA_ONNX_C_API_C_API_H_