sherpa-onnx-wasm-nodejs.cc 6.8 KB
// wasm/sherpa-onnx-wasm-main-nodejs.cc
//
// Copyright (c)  2024  Xiaomi Corporation
#include <stdio.h>

#include <algorithm>
#include <memory>

#include "sherpa-onnx/c-api/c-api.h"

extern "C" {

static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");

static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");

static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
                  sizeof(SherpaOnnxOfflineTransducerModelConfig) +
                      sizeof(SherpaOnnxOfflineParaformerModelConfig) +
                      sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
                      sizeof(SherpaOnnxOfflineWhisperModelConfig) +
                      sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
                      sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
                      sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
                      sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
                      sizeof(SherpaOnnxOfflineDolphinModelConfig),

              "");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
                  sizeof(SherpaOnnxFeatureConfig) +
                      sizeof(SherpaOnnxOfflineLMConfig) +
                      sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4,
              "");

void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
  auto tts_model_config = &tts_config->model;
  auto vits_model_config = &tts_model_config->vits;
  fprintf(stdout, "----------vits model config----------\n");
  fprintf(stdout, "model: %s\n", vits_model_config->model);
  fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
  fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
  fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
  fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
  fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
  fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
  fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);

  fprintf(stdout, "----------tts model config----------\n");
  fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
  fprintf(stdout, "debug: %d\n", tts_model_config->debug);
  fprintf(stdout, "provider: %s\n", tts_model_config->provider);

  fprintf(stdout, "----------tts config----------\n");
  fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
  fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
}

void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
  auto model_config = &config->model_config;
  auto feat = &config->feat_config;
  auto transducer = &model_config->transducer;
  auto paraformer = &model_config->paraformer;
  auto nemo_ctc = &model_config->nemo_ctc;
  auto whisper = &model_config->whisper;
  auto tdnn = &model_config->tdnn;
  auto sense_voice = &model_config->sense_voice;
  auto moonshine = &model_config->moonshine;
  auto fire_red_asr = &model_config->fire_red_asr;
  auto dolphin = &model_config->dolphin;

  fprintf(stdout, "----------offline transducer model config----------\n");
  fprintf(stdout, "encoder: %s\n", transducer->encoder);
  fprintf(stdout, "decoder: %s\n", transducer->decoder);
  fprintf(stdout, "joiner: %s\n", transducer->joiner);

  fprintf(stdout, "----------offline paraformer model config----------\n");
  fprintf(stdout, "model: %s\n", paraformer->model);

  fprintf(stdout, "----------offline nemo_ctc model config----------\n");
  fprintf(stdout, "model: %s\n", nemo_ctc->model);

  fprintf(stdout, "----------offline whisper model config----------\n");
  fprintf(stdout, "encoder: %s\n", whisper->encoder);
  fprintf(stdout, "decoder: %s\n", whisper->decoder);
  fprintf(stdout, "language: %s\n", whisper->language);
  fprintf(stdout, "task: %s\n", whisper->task);
  fprintf(stdout, "tail_paddings: %d\n", whisper->tail_paddings);

  fprintf(stdout, "----------offline tdnn model config----------\n");
  fprintf(stdout, "model: %s\n", tdnn->model);

  fprintf(stdout, "----------offline sense_voice model config----------\n");
  fprintf(stdout, "model: %s\n", sense_voice->model);
  fprintf(stdout, "language: %s\n", sense_voice->language);
  fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn);

  fprintf(stdout, "----------offline moonshine model config----------\n");
  fprintf(stdout, "preprocessor: %s\n", moonshine->preprocessor);
  fprintf(stdout, "encoder: %s\n", moonshine->encoder);
  fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
  fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);

  fprintf(stdout, "----------offline FireRedAsr model config----------\n");
  fprintf(stdout, "encoder: %s\n", fire_red_asr->encoder);
  fprintf(stdout, "decoder: %s\n", fire_red_asr->decoder);

  fprintf(stdout, "----------offline Dolphin model config----------\n");
  fprintf(stdout, "model: %s\n", dolphin->model);

  fprintf(stdout, "tokens: %s\n", model_config->tokens);
  fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
  fprintf(stdout, "provider: %s\n", model_config->provider);
  fprintf(stdout, "debug: %d\n", model_config->debug);
  fprintf(stdout, "model type: %s\n", model_config->model_type);
  fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit);
  fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab);
  fprintf(stdout, "telespeech_ctc: %s\n", model_config->telespeech_ctc);

  fprintf(stdout, "----------feat config----------\n");
  fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
  fprintf(stdout, "feat dim: %d\n", feat->feature_dim);

  fprintf(stdout, "----------recognizer config----------\n");
  fprintf(stdout, "decoding method: %s\n", config->decoding_method);
  fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
  fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
  fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
  fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
  fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
  fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
}
}