Add C++ and Python API for Dolphin CTC models (#2085)

Fangjun Kuang · GitHub
Commit 0de7e1b9f0e1b278af85d91c210562b556dd3ef1 0de7e1b9 1 parent 1316719e
.github/scripts/test-offline-ctc.sh
.github/scripts/test-python.sh
.github/workflows/export-dophin-ctc-to-onnx.yaml
.github/workflows/linux.yaml
.github/workflows/macos.yaml
CMakeLists.txt
python-api-examples/offline-dolphin-ctc-decode-files.py
sherpa-onnx/csrc/CMakeLists.txt
sherpa-onnx/csrc/offline-ctc-model.cc
sherpa-onnx/csrc/offline-ctc-model.h
sherpa-onnx/csrc/offline-dolphin-model-config.cc
sherpa-onnx/csrc/offline-dolphin-model-config.h
sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
sherpa-onnx/csrc/offline-dolphin-model.cc
sherpa-onnx/csrc/offline-dolphin-model.h
sherpa-onnx/csrc/offline-model-config.cc
sherpa-onnx/csrc/offline-model-config.h
sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
sherpa-onnx/csrc/offline-recognizer-impl.cc
sherpa-onnx/csrc/offline-sense-voice-model-config.h
--- a/.github/scripts/test-offline-ctc.sh
查看文件 @0de7e1b
+++ b/.github/scripts/test-offline-ctc.sh
查看文件 @0de7e1b
@@ -15,6 +15,39 @@ echo "PATH: $PATH"
 
 which $EXE
 
+ for type in base small; do
+   log "------------------------------------------------------------"
+   log "Run Dolphin CTC models ($type int8)"
+   log "------------------------------------------------------------"
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
+   tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
+   rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
+ 
+   $EXE \
+     --dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
+     --tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
+     --debug=1 \
+     ./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav
+ 
+   rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
+ 
+   log "------------------------------------------------------------"
+   log "Run Dolphin CTC models ($type)"
+   log "------------------------------------------------------------"
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
+   tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
+   rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
+ 
+   $EXE \
+     --dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
+     --tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
+     --debug=1 \
+     ./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav
+ 
+   rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
+ done
+ 
+ 
 log "------------------------------------------------------------"
 log "Run NeMo GigaAM Russian models"
 log "------------------------------------------------------------"
--- a/.github/scripts/test-python.sh
查看文件 @0de7e1b
+++ b/.github/scripts/test-python.sh
查看文件 @0de7e1b
@@ -8,6 +8,15 @@ log() {
   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 
+ log "test offline dolphin ctc"
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+ tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+ 
+ python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py
+ 
+ rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
+ 
 log "test offline speech enhancement (GTCRN)"
 
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
--- a/.github/workflows/export-dophin-ctc-to-onnx.yaml 0 → 100644
查看文件 @0de7e1b
+++ b/.github/workflows/export-dophin-ctc-to-onnx.yaml 0 → 100644
查看文件 @0de7e1b
+ name: export-dolphin-ctc-to-onnx
+ 
+ on:
+   workflow_dispatch:
+ 
+ concurrency:
+   group: export-dolphin-ctc-to-onnx-${{ github.ref }}
+   cancel-in-progress: true
+ 
+ jobs:
+   export-dolphin-ctc-to-onnx:
+     if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+     name: ${{ matrix.model_type }}
+     runs-on: ${{ matrix.os }}
+     strategy:
+       fail-fast: false
+       matrix:
+         os: [macos-latest]
+         model_type: [small, base]
+ 
+     steps:
+       - uses: actions/checkout@v4
+ 
+       - name: Download ${{ matrix.model_type }}
+         shell: bash
+         run: |
+           git lfs install
+           type=${{ matrix.model_type }}
+ 
+           git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
+           git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
+ 
+           rm -rf sherpa-onnx-dolphin-*/.git*
+ 
+           ls -lha sherpa-onnx-dolphin-*/
+ 
+           tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
+           tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
+ 
+       - name: Release
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           file: ./*.tar.bz2
+           overwrite: true
+           repo_name: k2-fsa/sherpa-onnx
+           repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+           tag: asr-models
--- a/.github/workflows/linux.yaml
查看文件 @0de7e1b
+++ b/.github/workflows/linux.yaml
查看文件 @0de7e1b
@@ -205,6 +205,16 @@ jobs:
           overwrite: true
           file: sherpa-onnx-*.tar.bz2
 
+       - name: Test offline CTC
+         shell: bash
+         run: |
+           du -h -d1 .
+           export PATH=$PWD/build/bin:$PATH
+           export EXE=sherpa-onnx-offline
+ 
+           .github/scripts/test-offline-ctc.sh
+           du -h -d1 .
+ 
       - name: Test offline speech denoiser
         shell: bash
         run: |
@@ -249,16 +259,6 @@ jobs:
           .github/scripts/test-offline-moonshine.sh
           du -h -d1 .
 
-       - name: Test offline CTC
-         shell: bash
-         run: |
-           du -h -d1 .
-           export PATH=$PWD/build/bin:$PATH
-           export EXE=sherpa-onnx-offline
- 
-           .github/scripts/test-offline-ctc.sh
-           du -h -d1 .
- 
       - name: Test C++ API
         shell: bash
         run: |
--- a/.github/workflows/macos.yaml
查看文件 @0de7e1b
+++ b/.github/workflows/macos.yaml
查看文件 @0de7e1b
@@ -162,6 +162,14 @@ jobs:
           overwrite: true
           file: sherpa-onnx-*osx-universal2*.tar.bz2
 
+       - name: Test offline CTC
+         shell: bash
+         run: |
+           export PATH=$PWD/build/bin:$PATH
+           export EXE=sherpa-onnx-offline
+ 
+           .github/scripts/test-offline-ctc.sh
+ 
       - name: Test offline speech denoiser
         shell: bash
         run: |
@@ -226,14 +234,6 @@ jobs:
 
           .github/scripts/test-online-punctuation.sh
 
-       - name: Test offline CTC
-         shell: bash
-         run: |
-           export PATH=$PWD/build/bin:$PATH
-           export EXE=sherpa-onnx-offline
- 
-           .github/scripts/test-offline-ctc.sh
- 
       - name: Test online CTC
         shell: bash
         run: |
--- a/CMakeLists.txt
查看文件 @0de7e1b
+++ b/CMakeLists.txt
查看文件 @0de7e1b
+ if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
+   set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
+ endif()
+ 
 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 
 set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")
--- a/python-api-examples/offline-dolphin-ctc-decode-files.py 0 → 100755
查看文件 @0de7e1b
+++ b/python-api-examples/offline-dolphin-ctc-decode-files.py 0 → 100755
查看文件 @0de7e1b
+ #!/usr/bin/env python3
+ 
+ """
+ This file shows how to use a non-streaming CTC model from Dolphin
+ to decode files.
+ 
+ Please download model files from
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ """
+ 
+ from pathlib import Path
+ import time
+ 
+ import sherpa_onnx
+ import soundfile as sf
+ 
+ 
+ def create_recognizer():
+     model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
+     tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
+     test_wav = (
+         "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
+     )
+ 
+     if not Path(model).is_file() or not Path(test_wav).is_file():
+         raise ValueError(
+             """Please download model files from
+             https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+             """
+         )
+     return (
+         sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
+             model=model,
+             tokens=tokens,
+             debug=True,
+         ),
+         test_wav,
+     )
+ 
+ 
+ def main():
+     recognizer, wave_filename = create_recognizer()
+ 
+     audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+     audio = audio[:, 0]  # only use the first channel
+ 
+     # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+     # sample_rate does not need to be 16000 Hz
+ 
+     start = time.time()
+     stream = recognizer.create_stream()
+     stream.accept_waveform(sample_rate, audio)
+     recognizer.decode_stream(stream)
+     end = time.time()
+ 
+     print(wave_filename)
+     print(stream.result)
+ 
+     elapsed_seconds = end - start
+     audio_duration = len(audio) / sample_rate
+     real_time_factor = elapsed_seconds / audio_duration
+ 
+     print(f"Elapsed seconds: {elapsed_seconds:.3f}")
+     print(f"Audio duration in seconds: {audio_duration:.3f}")
+     print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
+ 
+ 
+ if __name__ == "__main__":
+     main()
--- a/sherpa-onnx/csrc/CMakeLists.txt
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/CMakeLists.txt
查看文件 @0de7e1b
@@ -27,6 +27,8 @@ set(sources
   offline-ctc-fst-decoder.cc
   offline-ctc-greedy-search-decoder.cc
   offline-ctc-model.cc
+   offline-dolphin-model-config.cc
+   offline-dolphin-model.cc
   offline-fire-red-asr-greedy-search-decoder.cc
   offline-fire-red-asr-model-config.cc
   offline-fire-red-asr-model.cc
--- a/sherpa-onnx/csrc/offline-ctc-model.cc
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-ctc-model.cc
查看文件 @0de7e1b
@@ -20,6 +20,7 @@
 
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
+ #include "sherpa-onnx/csrc/offline-dolphin-model.h"
 #include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
@@ -110,6 +111,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
 
 std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
     const OfflineModelConfig &config) {
+   if (!config.dolphin.model.empty()) {
+     return std::make_unique<OfflineDolphinModel>(config);
+   }
+ 
   // TODO(fangjun): Refactor it. We don't need to use model_type here
   ModelType model_type = ModelType::kUnknown;
 
@@ -160,6 +165,10 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
 template <typename Manager>
 std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
     Manager *mgr, const OfflineModelConfig &config) {
+   if (!config.dolphin.model.empty()) {
+     return std::make_unique<OfflineDolphinModel>(mgr, config);
+   }
+ 
   // TODO(fangjun): Refactor it. We don't need to use model_type here
   ModelType model_type = ModelType::kUnknown;
 
--- a/sherpa-onnx/csrc/offline-ctc-model.h
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-ctc-model.h
查看文件 @0de7e1b
@@ -64,6 +64,10 @@ class OfflineCtcModel {
   // return true for models from https://github.com/salute-developers/GigaAM
   // return false otherwise
   virtual bool IsGigaAM() const { return false; }
+ 
+   // For Dolphin models, they use global CMVN
+   virtual void NormalizeFeatures(float *features, int32_t num_frames,
+                                  int32_t feat_dim) const {}
 };
 
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/offline-dolphin-model-config.cc 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-dolphin-model-config.cc 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/csrc/offline-dolphin-model-config.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
+ 
+ #include "sherpa-onnx/csrc/file-utils.h"
+ #include "sherpa-onnx/csrc/macros.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void OfflineDolphinModelConfig::Register(ParseOptions *po) {
+   po->Register("dolphin-model", &model,
+                "Path to model.onnx of Dolphin CTC branch.");
+ }
+ 
+ bool OfflineDolphinModelConfig::Validate() const {
+   if (!FileExists(model)) {
+     SHERPA_ONNX_LOGE("Dolphin model '%s' does not exist", model.c_str());
+     return false;
+   }
+ 
+   return true;
+ }
+ 
+ std::string OfflineDolphinModelConfig::ToString() const {
+   std::ostringstream os;
+ 
+   os << "OfflineDolphinModelConfig(";
+   os << "model=\"" << model << "\")";
+ 
+   return os.str();
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/offline-dolphin-model-config.h 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-dolphin-model-config.h 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/csrc/offline-dolphin-model-config.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ #ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
+ #define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
+ 
+ #include <string>
+ 
+ #include "sherpa-onnx/csrc/parse-options.h"
+ 
+ namespace sherpa_onnx {
+ 
+ struct OfflineDolphinModelConfig {
+   std::string model;
+ 
+   OfflineDolphinModelConfig() = default;
+   explicit OfflineDolphinModelConfig(const std::string &model) : model(model) {}
+ 
+   void Register(ParseOptions *po);
+   bool Validate() const;
+ 
+   std::string ToString() const;
+ };
+ 
+ }  // namespace sherpa_onnx
+ 
+ #endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
--- a/sherpa-onnx/csrc/offline-dolphin-model-meta-data.h 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-dolphin-model-meta-data.h 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
+ //
+ // Copyright (c)  2024  Xiaomi Corporation
+ #ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
+ #define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
+ 
+ #include <string>
+ #include <vector>
+ 
+ namespace sherpa_onnx {
+ 
+ struct OfflineDolphinModelMetaData {
+   int32_t vocab_size;
+   int32_t subsampling_factor = 4;
+   std::vector<float> mean;
+   std::vector<float> inv_stddev;
+ };
+ 
+ }  // namespace sherpa_onnx
+ 
+ #endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
--- a/sherpa-onnx/csrc/offline-dolphin-model.cc 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-dolphin-model.cc 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/csrc/offline-dolphin-model.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/csrc/offline-dolphin-model.h"
+ 
+ #include <algorithm>
+ #include <string>
+ #include <utility>
+ 
+ #if __ANDROID_API__ >= 9
+ #include "android/asset_manager.h"
+ #include "android/asset_manager_jni.h"
+ #endif
+ 
+ #if __OHOS__
+ #include "rawfile/raw_file_manager.h"
+ #endif
+ 
+ #include "sherpa-onnx/csrc/file-utils.h"
+ #include "sherpa-onnx/csrc/macros.h"
+ #include "sherpa-onnx/csrc/onnx-utils.h"
+ #include "sherpa-onnx/csrc/session.h"
+ #include "sherpa-onnx/csrc/text-utils.h"
+ 
+ namespace sherpa_onnx {
+ 
+ class OfflineDolphinModel::Impl {
+  public:
+   explicit Impl(const OfflineModelConfig &config)
+       : config_(config),
+         env_(ORT_LOGGING_LEVEL_ERROR),
+         sess_opts_(GetSessionOptions(config)),
+         allocator_{} {
+     auto buf = ReadFile(config_.dolphin.model);
+     Init(buf.data(), buf.size());
+   }
+ 
+   template <typename Manager>
+   Impl(Manager *mgr, const OfflineModelConfig &config)
+       : config_(config),
+         env_(ORT_LOGGING_LEVEL_ERROR),
+         sess_opts_(GetSessionOptions(config)),
+         allocator_{} {
+     auto buf = ReadFile(mgr, config_.dolphin.model);
+     Init(buf.data(), buf.size());
+   }
+ 
+   std::vector<Ort::Value> Forward(Ort::Value features,
+                                   Ort::Value features_length) {
+     std::array<Ort::Value, 2> inputs = {
+         std::move(features),
+         std::move(features_length),
+     };
+ 
+     return sess_->Run({}, input_names_ptr_.data(), inputs.data(), inputs.size(),
+                       output_names_ptr_.data(), output_names_ptr_.size());
+   }
+ 
+   int32_t VocabSize() const { return meta_data_.vocab_size; }
+ 
+   int32_t SubsamplingFactor() const { return meta_data_.subsampling_factor; }
+ 
+   void NormalizeFeatures(float *features, int32_t num_frames,
+                          int32_t feat_dim) const {
+     auto p = features;
+     const auto &mean = meta_data_.mean;
+     const auto &invstd = meta_data_.inv_stddev;
+ 
+     for (int32_t f = 0; f < num_frames; ++f) {
+       for (int32_t d = 0; d < feat_dim; ++d) {
+         p[d] = (p[d] - mean[d]) * invstd[d];
+       }
+       p += feat_dim;
+     }
+   }
+ 
+   OrtAllocator *Allocator() { return allocator_; }
+ 
+  private:
+   void Init(void *model_data, size_t model_data_length) {
+     sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
+                                            sess_opts_);
+ 
+     GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
+ 
+     GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_);
+ 
+     // get meta data
+     Ort::ModelMetadata meta_data = sess_->GetModelMetadata();
+     if (config_.debug) {
+       std::ostringstream os;
+       PrintModelMetadata(os, meta_data);
+ #if __OHOS__
+       SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
+ #else
+       SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
+ #endif
+     }
+ 
+     Ort::AllocatorWithDefaultOptions allocator;  // used in the macro below
+     SHERPA_ONNX_READ_META_DATA(meta_data_.vocab_size, "vocab_size");
+ 
+     SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.mean, "mean");
+     SHERPA_ONNX_READ_META_DATA_VEC_FLOAT(meta_data_.inv_stddev, "invstd");
+   }
+ 
+  private:
+   OfflineModelConfig config_;
+   Ort::Env env_;
+   Ort::SessionOptions sess_opts_;
+   Ort::AllocatorWithDefaultOptions allocator_;
+ 
+   std::unique_ptr<Ort::Session> sess_;
+ 
+   std::vector<std::string> input_names_;
+   std::vector<const char *> input_names_ptr_;
+ 
+   std::vector<std::string> output_names_;
+   std::vector<const char *> output_names_ptr_;
+ 
+   OfflineDolphinModelMetaData meta_data_;
+ };
+ 
+ OfflineDolphinModel::OfflineDolphinModel(const OfflineModelConfig &config)
+     : impl_(std::make_unique<Impl>(config)) {}
+ 
+ template <typename Manager>
+ OfflineDolphinModel::OfflineDolphinModel(Manager *mgr,
+                                          const OfflineModelConfig &config)
+     : impl_(std::make_unique<Impl>(mgr, config)) {}
+ 
+ OfflineDolphinModel::~OfflineDolphinModel() = default;
+ 
+ std::vector<Ort::Value> OfflineDolphinModel::Forward(
+     Ort::Value features, Ort::Value features_length) {
+   return impl_->Forward(std::move(features), std::move(features_length));
+ }
+ 
+ int32_t OfflineDolphinModel::VocabSize() const { return impl_->VocabSize(); }
+ 
+ int32_t OfflineDolphinModel::SubsamplingFactor() const {
+   return impl_->SubsamplingFactor();
+ }
+ 
+ void OfflineDolphinModel::NormalizeFeatures(float *features, int32_t num_frames,
+                                             int32_t feat_dim) const {
+   return impl_->NormalizeFeatures(features, num_frames, feat_dim);
+ }
+ 
+ OrtAllocator *OfflineDolphinModel::Allocator() const {
+   return impl_->Allocator();
+ }
+ 
+ #if __ANDROID_API__ >= 9
+ template OfflineDolphinModel::OfflineDolphinModel(
+     AAssetManager *mgr, const OfflineModelConfig &config);
+ #endif
+ 
+ #if __OHOS__
+ template OfflineDolphinModel::OfflineDolphinModel(
+     NativeResourceManager *mgr, const OfflineModelConfig &config);
+ #endif
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/offline-dolphin-model.h 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-dolphin-model.h 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/csrc/offline-dolphin-model.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ #ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
+ #define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
+ 
+ #include <memory>
+ #include <vector>
+ 
+ #include "onnxruntime_cxx_api.h"  // NOLINT
+ #include "sherpa-onnx/csrc/offline-ctc-model.h"
+ #include "sherpa-onnx/csrc/offline-dolphin-model-meta-data.h"
+ #include "sherpa-onnx/csrc/offline-model-config.h"
+ 
+ namespace sherpa_onnx {
+ 
+ class OfflineDolphinModel : public OfflineCtcModel {
+  public:
+   explicit OfflineDolphinModel(const OfflineModelConfig &config);
+ 
+   template <typename Manager>
+   OfflineDolphinModel(Manager *mgr, const OfflineModelConfig &config);
+ 
+   ~OfflineDolphinModel() override;
+ 
+   /** Run the forward method of the model.
+    *
+    * @param features  A tensor of shape (N, T, C).
+    * @param features_length  A 1-D tensor of shape (N,) containing number of
+    *                         valid frames in `features` before padding.
+    *                         Its dtype is int64_t.
+    *
+    * @return Return a vector containing:
+    *  - log_probs: A 3-D tensor of shape (N, T', vocab_size).
+    *  - log_probs_length A 1-D tensor of shape (N,). Its dtype is int64_t
+    */
+   std::vector<Ort::Value> Forward(Ort::Value features,
+                                   Ort::Value features_length) override;
+ 
+   /** Return the vocabulary size of the model
+    */
+   int32_t VocabSize() const override;
+ 
+   /** SubsamplingFactor of the model
+    *
+    * For Citrinet, the subsampling factor is usually 4.
+    * For Conformer CTC, the subsampling factor is usually 8.
+    */
+   int32_t SubsamplingFactor() const override;
+ 
+   /** Return an allocator for allocating memory
+    */
+   OrtAllocator *Allocator() const override;
+ 
+   bool SupportBatchProcessing() const override { return true; }
+ 
+   void NormalizeFeatures(float *features, int32_t num_frames,
+                          int32_t feat_dim) const override;
+ 
+  private:
+   class Impl;
+   std::unique_ptr<Impl> impl_;
+ };
+ 
+ }  // namespace sherpa_onnx
+ 
+ #endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_H_
--- a/sherpa-onnx/csrc/offline-model-config.cc
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-model-config.cc
查看文件 @0de7e1b
@@ -21,6 +21,7 @@ void OfflineModelConfig::Register(ParseOptions *po) {
   wenet_ctc.Register(po);
   sense_voice.Register(po);
   moonshine.Register(po);
+   dolphin.Register(po);
 
   po->Register("telespeech-ctc", &telespeech_ctc,
                "Path to model.onnx for telespeech ctc");
@@ -109,6 +110,10 @@ bool OfflineModelConfig::Validate() const {
     return moonshine.Validate();
   }
 
+   if (!dolphin.model.empty()) {
+     return dolphin.Validate();
+   }
+ 
   if (!telespeech_ctc.empty() && !FileExists(telespeech_ctc)) {
     SHERPA_ONNX_LOGE("telespeech_ctc: '%s' does not exist",
                      telespeech_ctc.c_str());
@@ -136,6 +141,7 @@ std::string OfflineModelConfig::ToString() const {
   os << "wenet_ctc=" << wenet_ctc.ToString() << ", ";
   os << "sense_voice=" << sense_voice.ToString() << ", ";
   os << "moonshine=" << moonshine.ToString() << ", ";
+   os << "dolphin=" << dolphin.ToString() << ", ";
   os << "telespeech_ctc=\"" << telespeech_ctc << "\", ";
   os << "tokens=\"" << tokens << "\", ";
   os << "num_threads=" << num_threads << ", ";
--- a/sherpa-onnx/csrc/offline-model-config.h
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-model-config.h
查看文件 @0de7e1b
@@ -6,6 +6,7 @@
 
 #include <string>
 
+ #include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
 #include "sherpa-onnx/csrc/offline-fire-red-asr-model-config.h"
 #include "sherpa-onnx/csrc/offline-moonshine-model-config.h"
 #include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model-config.h"
@@ -30,6 +31,7 @@ struct OfflineModelConfig {
   OfflineWenetCtcModelConfig wenet_ctc;
   OfflineSenseVoiceModelConfig sense_voice;
   OfflineMoonshineModelConfig moonshine;
+   OfflineDolphinModelConfig dolphin;
   std::string telespeech_ctc;
 
   std::string tokens;
@@ -62,6 +64,7 @@ struct OfflineModelConfig {
                      const OfflineWenetCtcModelConfig &wenet_ctc,
                      const OfflineSenseVoiceModelConfig &sense_voice,
                      const OfflineMoonshineModelConfig &moonshine,
+                      const OfflineDolphinModelConfig &dolphin,
                      const std::string &telespeech_ctc,
                      const std::string &tokens, int32_t num_threads, bool debug,
                      const std::string &provider, const std::string &model_type,
@@ -77,6 +80,7 @@ struct OfflineModelConfig {
         wenet_ctc(wenet_ctc),
         sense_voice(sense_voice),
         moonshine(moonshine),
+         dolphin(dolphin),
         telespeech_ctc(telespeech_ctc),
         tokens(tokens),
         num_threads(num_threads),
--- a/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-recognizer-ctc-impl.h
查看文件 @0de7e1b
@@ -118,6 +118,19 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
       }
     }
 
+     if (!config_.model_config.dolphin.model.empty()) {
+       config_.feat_config.low_freq = 0;
+       config_.feat_config.high_freq = 8000;
+       config_.feat_config.remove_dc_offset = false;
+       config_.feat_config.dither = 0;
+       config_.feat_config.preemph_coeff = 0;
+       config_.feat_config.window_type = "hann";
+       config_.feat_config.feature_dim = 80;
+       config_.feat_config.is_librosa = true;
+       config_.feat_config.frame_length_ms = 31.25;  // 16000/512 = 31.25
+       config_.feat_config.snip_edges = false;
+     }
+ 
     if (!config_.model_config.wenet_ctc.model.empty()) {
       // WeNet CTC models assume input samples are in the range
       // [-32768, 32767], so we set normalize_samples to false
@@ -157,7 +170,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
     } else {
       SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
                        config_.decoding_method.c_str());
-       exit(-1);
+       SHERPA_ONNX_EXIT(-1);
     }
   }
 
@@ -166,7 +179,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
   }
 
   void DecodeStreams(OfflineStream **ss, int32_t n) const override {
-     if (!model_->SupportBatchProcessing()) {
+     if (!model_->SupportBatchProcessing() || (n == 1)) {
       // If the model does not support batch process,
       // we process each stream independently.
       for (int32_t i = 0; i != n; ++i) {
@@ -190,6 +203,9 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
       std::vector<float> f = ss[i]->GetFrames();
 
       int32_t num_frames = f.size() / feat_dim;
+ 
+       model_->NormalizeFeatures(f.data(), num_frames, feat_dim);
+ 
       features_vec[i] = std::move(f);
 
       features_length_vec[i] = num_frames;
@@ -241,6 +257,8 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
 
     int32_t num_frames = f.size() / feat_dim;
 
+     model_->NormalizeFeatures(f.data(), num_frames, feat_dim);
+ 
     std::array<int64_t, 3> shape = {1, num_frames, feat_dim};
 
     Ort::Value x = Ort::Value::CreateTensor(memory_info, f.data(), f.size(),
--- a/sherpa-onnx/csrc/offline-recognizer-impl.cc
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-recognizer-impl.cc
查看文件 @0de7e1b
@@ -49,7 +49,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
   if (!config.model_config.nemo_ctc.model.empty() ||
       !config.model_config.zipformer_ctc.model.empty() ||
       !config.model_config.tdnn.model.empty() ||
-       !config.model_config.wenet_ctc.model.empty()) {
+       !config.model_config.wenet_ctc.model.empty() ||
+       !config.model_config.dolphin.model.empty()) {
     return std::make_unique<OfflineRecognizerCtcImpl>(config);
   }
 
@@ -234,7 +235,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
   if (!config.model_config.nemo_ctc.model.empty() ||
       !config.model_config.zipformer_ctc.model.empty() ||
       !config.model_config.tdnn.model.empty() ||
-       !config.model_config.wenet_ctc.model.empty()) {
+       !config.model_config.wenet_ctc.model.empty() ||
+       !config.model_config.dolphin.model.empty()) {
     return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
   }
 
--- a/sherpa-onnx/csrc/offline-sense-voice-model-config.h
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/offline-sense-voice-model-config.h
查看文件 @0de7e1b
@@ -23,9 +23,8 @@ struct OfflineSenseVoiceModelConfig {
   bool use_itn = false;
 
   OfflineSenseVoiceModelConfig() = default;
-   explicit OfflineSenseVoiceModelConfig(const std::string &model,
-                                         const std::string &language,
-                                         bool use_itn)
+   OfflineSenseVoiceModelConfig(const std::string &model,
+                                const std::string &language, bool use_itn)
       : model(model), language(language), use_itn(use_itn) {}
 
   void Register(ParseOptions *po);
--- a/sherpa-onnx/csrc/online-recognizer-transducer-impl.h
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/online-recognizer-transducer-impl.h
查看文件 @0de7e1b
@@ -41,6 +41,9 @@ OnlineRecognizerResult Convert(const OnlineTransducerDecoderResult &src,
   std::string text;
   for (auto i : src.tokens) {
     auto sym = sym_table[i];
+     if (sym == "<unk>") {
+       continue;
+     }
 
     text.append(sym);
 
--- a/sherpa-onnx/csrc/rknn/silero-vad-model-rknn.h
查看文件 @0de7e1b
+++ b/sherpa-onnx/csrc/rknn/silero-vad-model-rknn.h
查看文件 @0de7e1b
@@ -4,6 +4,8 @@
 #ifndef SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
 #define SHERPA_ONNX_CSRC_RKNN_SILERO_VAD_MODEL_RKNN_H_
 
+ #include <memory>
+ 
 #include "rknn_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/online-model-config.h"
 #include "sherpa-onnx/csrc/vad-model.h"
--- a/sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @0de7e1b
+++ b/sherpa-onnx/python/csrc/CMakeLists.txt
查看文件 @0de7e1b
@@ -9,6 +9,7 @@ set(srcs
   features.cc
   keyword-spotter.cc
   offline-ctc-fst-decoder-config.cc
+   offline-dolphin-model-config.cc
   offline-fire-red-asr-model-config.cc
   offline-lm-config.cc
   offline-model-config.cc
--- a/sherpa-onnx/python/csrc/offline-dolphin-model-config.cc 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/python/csrc/offline-dolphin-model-config.cc 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/python/csrc/offline-dolphin-model-config.cc
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
+ 
+ #include <string>
+ #include <vector>
+ 
+ #include "sherpa-onnx/python/csrc/offline-dolphin-model-config.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineDolphinModelConfig(py::module *m) {
+   using PyClass = OfflineDolphinModelConfig;
+   py::class_<PyClass>(*m, "OfflineDolphinModelConfig")
+       .def(py::init<>())
+       .def(py::init<const std::string &>(), py::arg("model"))
+       .def_readwrite("model", &PyClass::model)
+       .def("__str__", &PyClass::ToString);
+ }
+ 
+ }  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/offline-dolphin-model-config.h 0 → 100644
查看文件 @0de7e1b
+++ b/sherpa-onnx/python/csrc/offline-dolphin-model-config.h 0 → 100644
查看文件 @0de7e1b
+ // sherpa-onnx/python/csrc/offline-dolphin-model-config.h
+ //
+ // Copyright (c)  2025  Xiaomi Corporation
+ 
+ #ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
+ #define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
+ 
+ #include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+ 
+ namespace sherpa_onnx {
+ 
+ void PybindOfflineDolphinModelConfig(py::module *m);
+ 
+ }
+ 
+ #endif  // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
--- a/sherpa-onnx/python/csrc/offline-model-config.cc
查看文件 @0de7e1b
+++ b/sherpa-onnx/python/csrc/offline-model-config.cc
查看文件 @0de7e1b
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "sherpa-onnx/csrc/offline-model-config.h"
+ #include "sherpa-onnx/python/csrc/offline-dolphin-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-fire-red-asr-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-moonshine-model-config.h"
 #include "sherpa-onnx/python/csrc/offline-nemo-enc-dec-ctc-model-config.h"
@@ -32,6 +33,7 @@ void PybindOfflineModelConfig(py::module *m) {
   PybindOfflineWenetCtcModelConfig(m);
   PybindOfflineSenseVoiceModelConfig(m);
   PybindOfflineMoonshineModelConfig(m);
+   PybindOfflineDolphinModelConfig(m);
 
   using PyClass = OfflineModelConfig;
   py::class_<PyClass>(*m, "OfflineModelConfig")
@@ -44,7 +46,8 @@ void PybindOfflineModelConfig(py::module *m) {
                     const OfflineZipformerCtcModelConfig &,
                     const OfflineWenetCtcModelConfig &,
                     const OfflineSenseVoiceModelConfig &,
-                     const OfflineMoonshineModelConfig &, const std::string &,
+                     const OfflineMoonshineModelConfig &,
+                     const OfflineDolphinModelConfig &, const std::string &,
                     const std::string &, int32_t, bool, const std::string &,
                     const std::string &, const std::string &,
                     const std::string &>(),
@@ -58,6 +61,7 @@ void PybindOfflineModelConfig(py::module *m) {
            py::arg("wenet_ctc") = OfflineWenetCtcModelConfig(),
            py::arg("sense_voice") = OfflineSenseVoiceModelConfig(),
            py::arg("moonshine") = OfflineMoonshineModelConfig(),
+            py::arg("dolphin") = OfflineDolphinModelConfig(),
            py::arg("telespeech_ctc") = "", py::arg("tokens"),
            py::arg("num_threads"), py::arg("debug") = false,
            py::arg("provider") = "cpu", py::arg("model_type") = "",
@@ -72,6 +76,7 @@ void PybindOfflineModelConfig(py::module *m) {
       .def_readwrite("wenet_ctc", &PyClass::wenet_ctc)
       .def_readwrite("sense_voice", &PyClass::sense_voice)
       .def_readwrite("moonshine", &PyClass::moonshine)
+       .def_readwrite("dolphin", &PyClass::dolphin)
       .def_readwrite("telespeech_ctc", &PyClass::telespeech_ctc)
       .def_readwrite("tokens", &PyClass::tokens)
       .def_readwrite("num_threads", &PyClass::num_threads)
--- a/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
查看文件 @0de7e1b
+++ b/sherpa-onnx/python/sherpa_onnx/offline_recognizer.py
查看文件 @0de7e1b
@@ -6,6 +6,7 @@ from typing import List, Optional
 from _sherpa_onnx import (
     FeatureExtractorConfig,
     OfflineCtcFstDecoderConfig,
+     OfflineDolphinModelConfig,
     OfflineFireRedAsrModelConfig,
     OfflineLMConfig,
     OfflineModelConfig,
@@ -409,6 +410,78 @@ class OfflineRecognizer(object):
         return self
 
     @classmethod
+     def from_dolphin_ctc(
+         cls,
+         model: str,
+         tokens: str,
+         num_threads: int = 1,
+         sample_rate: int = 16000,
+         feature_dim: int = 80,
+         decoding_method: str = "greedy_search",
+         debug: bool = False,
+         provider: str = "cpu",
+         rule_fsts: str = "",
+         rule_fars: str = "",
+     ):
+         """
+         Please refer to
+         `<https://k2-fsa.github.io/sherpa/onnx/dolphin/index.html>`_
+         to download pre-trained models.
+ 
+         Args:
+           model:
+             Path to ``model.onnx`` or ``model.int8.onnx``.
+           tokens:
+             Path to ``tokens.txt``. Each line in ``tokens.txt`` contains two
+             columns::
+ 
+                 symbol integer_id
+ 
+           num_threads:
+             Number of threads for neural network computation.
+           sample_rate:
+             Sample rate of the training data used to train the model.
+           feature_dim:
+             Dimension of the feature used to train the model.
+           decoding_method:
+             Valid values are greedy_search.
+           debug:
+             True to show debug messages.
+           provider:
+             onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
+           rule_fsts:
+             If not empty, it specifies fsts for inverse text normalization.
+             If there are multiple fsts, they are separated by a comma.
+           rule_fars:
+             If not empty, it specifies fst archives for inverse text normalization.
+             If there are multiple archives, they are separated by a comma.
+         """
+         self = cls.__new__(cls)
+         model_config = OfflineModelConfig(
+             dolphin=OfflineDolphinModelConfig(model=model),
+             tokens=tokens,
+             num_threads=num_threads,
+             debug=debug,
+             provider=provider,
+         )
+ 
+         feat_config = FeatureExtractorConfig(
+             sampling_rate=sample_rate,
+             feature_dim=feature_dim,
+         )
+ 
+         recognizer_config = OfflineRecognizerConfig(
+             feat_config=feat_config,
+             model_config=model_config,
+             decoding_method=decoding_method,
+             rule_fsts=rule_fsts,
+             rule_fars=rule_fars,
+         )
+         self.recognizer = _Recognizer(recognizer_config)
+         self.config = recognizer_config
+         return self
+ 
+     @classmethod
     def from_nemo_ctc(
         cls,
         model: str,