Fangjun Kuang
Committed by GitHub

Add Pascal API for Dolphin CTC models (#2096)

@@ -149,6 +149,11 @@ jobs: @@ -149,6 +149,11 @@ jobs:
149 cd ./pascal-api-examples 149 cd ./pascal-api-examples
150 150
151 pushd non-streaming-asr 151 pushd non-streaming-asr
  152 +
  153 + ./run-dolphin-ctc.sh
  154 + rm -rf sherpa-onnx-*
  155 + echo "---"
  156 +
152 ./run-zipformer-transducer.sh 157 ./run-zipformer-transducer.sh
153 rm -rf sherpa-onnx-* 158 rm -rf sherpa-onnx-*
154 echo "---" 159 echo "---"
@@ -253,7 +258,13 @@ jobs: @@ -253,7 +258,13 @@ jobs:
253 258
254 cd ./pascal-api-examples 259 cd ./pascal-api-examples
255 260
  261 +
256 pushd vad-with-non-streaming-asr 262 pushd vad-with-non-streaming-asr
  263 +
  264 + time ./run-vad-with-dolphin-ctc.sh
  265 + rm -rf sherpa-onnx-*
  266 + echo "---"
  267 +
257 time ./run-vad-with-moonshine.sh 268 time ./run-vad-with-moonshine.sh
258 rm -rf sherpa-onnx-* 269 rm -rf sherpa-onnx-*
259 echo "---" 270 echo "---"
@@ -60,7 +60,7 @@ This repository supports running the following functions **locally** @@ -60,7 +60,7 @@ This repository supports running the following functions **locally**
60 60
61 on the following platforms and operating systems: 61 on the following platforms and operating systems:
62 62
63 - - x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64) 63 + - x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64), **RK NPU**
64 - Linux, macOS, Windows, openKylin 64 - Linux, macOS, Windows, openKylin
65 - Android, WearOS 65 - Android, WearOS
66 - iOS 66 - iOS
@@ -5,6 +5,7 @@ APIs with non-streaming models for speech recognition. @@ -5,6 +5,7 @@ APIs with non-streaming models for speech recognition.
5 5
6 |File|Description| 6 |File|Description|
7 |----|-----------| 7 |----|-----------|
  8 +|[run-dolphin-ctc.sh](./run-dolphin-ctc.sh)|Use a non-streaming [Dolphin](https://github.com/DataoceanAI/Dolphin) CTC model for speech recognition|
8 |[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition| 9 |[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition|
9 |[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition| 10 |[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition|
10 |[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers| 11 |[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers|
  1 +{ Copyright (c) 2025 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Dolphin CTC model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program dolphin_ctc;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx';
  39 + Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt';
  40 + Config.ModelConfig.Provider := 'cpu';
  41 + Config.ModelConfig.NumThreads := 1;
  42 + Config.ModelConfig.Debug := False;
  43 +
  44 + WaveFilename := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav';
  45 +
  46 + Wave := SherpaOnnxReadWave(WaveFilename);
  47 +
  48 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  49 + Stream := Recognizer.CreateStream();
  50 + Start := Now;
  51 +
  52 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  53 + Recognizer.Decode(Stream);
  54 +
  55 + RecognitionResult := Recognizer.GetResult(Stream);
  56 +
  57 + Stop := Now;
  58 +
  59 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  60 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  61 + RealTimeFactor := Elapsed / Duration;
  62 +
  63 + WriteLn(RecognitionResult.ToString);
  64 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  65 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  66 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  67 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  68 +
  69 + {Free resources to avoid memory leak.
  70 +
  71 + Note: You don't need to invoke them for this simple script.
  72 + However, you have to invoke them in your own large/complex project.
  73 + }
  74 + FreeAndNil(Stream);
  75 + FreeAndNil(Recognizer);
  76 +end.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  29 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  30 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -dSHERPA_ONNX_USE_SHARED_LIBS \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./dolphin_ctc.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./dolphin_ctc
@@ -6,7 +6,10 @@ with non-streaming speech recognition models. @@ -6,7 +6,10 @@ with non-streaming speech recognition models.
6 6
7 |Directory| Description| 7 |Directory| Description|
8 |---------|------------| 8 |---------|------------|
9 -|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + Whisper for speech recognition.|  
10 -|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + SenseVoice for speech recognition.| 9 +|[run-vad-with-dolphin-ctc.sh](./run-vad-with-dolphin-ctc.sh)|It shows how to use the VAD + [Dolphin](https://github.com/DataoceanAI/Dolphin) for speech recognition.|
  10 +|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + [Whisper](https://github.com/openai/whisper) for speech recognition.|
  11 +|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) for speech recognition.|
  12 +|[run-vad-with-moonshine.sh](./run-vad-with-moonshine.sh)|It shows how to use the VAD + [Moonshine](https://github.com/usefulsensors/moonshine) for speech recognition.|
  13 +
11 14
12 Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models. 15 Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + popd
  24 +fi
  25 +
  26 +if [[ ! -f ./silero_vad.onnx ]]; then
  27 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  28 +fi
  29 +
  30 +if [ ! -f ./lei-jun-test.wav ]; then
  31 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  32 +fi
  33 +
  34 +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  35 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  36 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  37 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  38 +fi
  39 +
  40 +fpc \
  41 + -dSHERPA_ONNX_USE_SHARED_LIBS \
  42 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  43 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  44 + ./vad_with_dolphin.pas
  45 +
  46 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  47 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  48 +
  49 +./vad_with_dolphin
  1 +{ Copyright (c) 2025 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Dolphin model
  5 +with silero VAD to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program vad_with_dolphin;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + SysUtils;
  18 +
  19 +function CreateVad(): TSherpaOnnxVoiceActivityDetector;
  20 +var
  21 + Config: TSherpaOnnxVadModelConfig;
  22 +
  23 + SampleRate: Integer;
  24 + WindowSize: Integer;
  25 +begin
  26 + Initialize(Config);
  27 +
  28 + SampleRate := 16000; {Please don't change it unless you know the details}
  29 + WindowSize := 512; {Please don't change it unless you know the details}
  30 +
  31 + Config.SileroVad.Model := './silero_vad.onnx';
  32 + Config.SileroVad.MinSpeechDuration := 0.5;
  33 + Config.SileroVad.MinSilenceDuration := 0.5;
  34 + Config.SileroVad.Threshold := 0.5;
  35 + Config.SileroVad.WindowSize := WindowSize;
  36 + Config.NumThreads:= 1;
  37 + Config.Debug:= True;
  38 + Config.Provider:= 'cpu';
  39 + Config.SampleRate := SampleRate;
  40 +
  41 + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
  42 +end;
  43 +
  44 +function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer;
  45 +var
  46 + Config: TSherpaOnnxOfflineRecognizerConfig;
  47 +begin
  48 + Initialize(Config);
  49 +
  50 + Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx';
  51 + Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt';
  52 + Config.ModelConfig.Provider := 'cpu';
  53 + Config.ModelConfig.NumThreads := 1;
  54 + Config.ModelConfig.Debug := False;
  55 +
  56 + Result := TSherpaOnnxOfflineRecognizer.Create(Config);
  57 +end;
  58 +
  59 +var
  60 + Wave: TSherpaOnnxWave;
  61 +
  62 + Recognizer: TSherpaOnnxOfflineRecognizer;
  63 + Vad: TSherpaOnnxVoiceActivityDetector;
  64 +
  65 + Offset: Integer;
  66 + WindowSize: Integer;
  67 + SpeechSegment: TSherpaOnnxSpeechSegment;
  68 +
  69 + Start: Single;
  70 + Duration: Single;
  71 +
  72 + Stream: TSherpaOnnxOfflineStream;
  73 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  74 +begin
  75 + Vad := CreateVad();
  76 + Recognizer := CreateOfflineRecognizer();
  77 +
  78 + Wave := SherpaOnnxReadWave('./lei-jun-test.wav');
  79 + if Wave.SampleRate <> Vad.Config.SampleRate then
  80 + begin
  81 + WriteLn(Format('Expected sample rate: %d. Given: %d',
  82 + [Vad.Config.SampleRate, Wave.SampleRate]));
  83 +
  84 + Exit;
  85 + end;
  86 +
  87 + WindowSize := Vad.Config.SileroVad.WindowSize;
  88 + Offset := 0;
  89 + while Offset + WindowSize <= Length(Wave.Samples) do
  90 + begin
  91 + Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize);
  92 + Offset += WindowSize;
  93 +
  94 + while not Vad.IsEmpty do
  95 + begin
  96 + SpeechSegment := Vad.Front();
  97 + Vad.Pop();
  98 + Stream := Recognizer.CreateStream();
  99 +
  100 + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
  101 + Recognizer.Decode(Stream);
  102 + RecognitionResult := Recognizer.GetResult(Stream);
  103 +
  104 + Start := SpeechSegment.Start / Wave.SampleRate;
  105 + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
  106 + WriteLn(Format('%.3f -- %.3f %s',
  107 + [Start, Start + Duration, RecognitionResult.Text]));
  108 +
  109 + FreeAndNil(Stream);
  110 + end;
  111 + end;
  112 +
  113 + Vad.Flush;
  114 +
  115 + while not Vad.IsEmpty do
  116 + begin
  117 + SpeechSegment := Vad.Front();
  118 + Vad.Pop();
  119 + Stream := Recognizer.CreateStream();
  120 +
  121 + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate);
  122 + Recognizer.Decode(Stream);
  123 + RecognitionResult := Recognizer.GetResult(Stream);
  124 +
  125 + Start := SpeechSegment.Start / Wave.SampleRate;
  126 + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate;
  127 + WriteLn(Format('%.3f -- %.3f %s',
  128 + [Start, Start + Duration, RecognitionResult.Text]));
  129 +
  130 + FreeAndNil(Stream);
  131 + end;
  132 +
  133 + FreeAndNil(Recognizer);
  134 + FreeAndNil(Vad);
  135 +end.
@@ -8,7 +8,7 @@ You can download the model files from @@ -8,7 +8,7 @@ You can download the model files from
8 https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models 8 https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
9 } 9 }
10 10
11 -program vad_with_whisper; 11 +program vad_with_sense_voice;
12 12
13 {$mode objfpc} 13 {$mode objfpc}
14 14
@@ -1969,7 +1969,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( @@ -1969,7 +1969,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
1969 return p->impl->GetOutputSamplingRate(); 1969 return p->impl->GetOutputSamplingRate();
1970 } 1970 }
1971 1971
1972 -void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) { 1972 +void SherpaOnnxLinearResamplerReset(const SherpaOnnxLinearResampler *p) {
1973 p->impl->Reset(); 1973 p->impl->Reset();
1974 } 1974 }
1975 1975
@@ -270,6 +270,11 @@ type @@ -270,6 +270,11 @@ type
270 function ToString: AnsiString; 270 function ToString: AnsiString;
271 end; 271 end;
272 272
  273 + TSherpaOnnxOfflineDolphinModelConfig = record
  274 + Model: AnsiString;
  275 + function ToString: AnsiString;
  276 + end;
  277 +
273 TSherpaOnnxOfflineWhisperModelConfig = record 278 TSherpaOnnxOfflineWhisperModelConfig = record
274 Encoder: AnsiString; 279 Encoder: AnsiString;
275 Decoder: AnsiString; 280 Decoder: AnsiString;
@@ -331,6 +336,7 @@ type @@ -331,6 +336,7 @@ type
331 SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; 336 SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
332 Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; 337 Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
333 FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; 338 FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
  339 + Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
334 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); 340 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
335 function ToString: AnsiString; 341 function ToString: AnsiString;
336 end; 342 end;
@@ -694,6 +700,9 @@ type @@ -694,6 +700,9 @@ type
694 SherpaOnnxOfflineNemoEncDecCtcModelConfig = record 700 SherpaOnnxOfflineNemoEncDecCtcModelConfig = record
695 Model: PAnsiChar; 701 Model: PAnsiChar;
696 end; 702 end;
  703 + SherpaOnnxOfflineDolphinModelConfig = record
  704 + Model: PAnsiChar;
  705 + end;
697 SherpaOnnxOfflineWhisperModelConfig = record 706 SherpaOnnxOfflineWhisperModelConfig = record
698 Encoder: PAnsiChar; 707 Encoder: PAnsiChar;
699 Decoder: PAnsiChar; 708 Decoder: PAnsiChar;
@@ -740,6 +749,7 @@ type @@ -740,6 +749,7 @@ type
740 SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; 749 SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
741 Moonshine: SherpaOnnxOfflineMoonshineModelConfig; 750 Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
742 FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; 751 FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
  752 + Dolphin: SherpaOnnxOfflineDolphinModelConfig;
743 end; 753 end;
744 754
745 SherpaOnnxOfflineRecognizerConfig = record 755 SherpaOnnxOfflineRecognizerConfig = record
@@ -1461,6 +1471,12 @@ begin @@ -1461,6 +1471,12 @@ begin
1461 [Self.Model]); 1471 [Self.Model]);
1462 end; 1472 end;
1463 1473
  1474 +function TSherpaOnnxOfflineDolphinModelConfig.ToString: AnsiString;
  1475 +begin
  1476 + Result := Format('TSherpaOnnxOfflineDolphinModelConfig(Model := %s)',
  1477 + [Self.Model]);
  1478 +end;
  1479 +
1464 function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; 1480 function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
1465 begin 1481 begin
1466 Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + 1482 Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
@@ -1534,14 +1550,15 @@ begin @@ -1534,14 +1550,15 @@ begin
1534 'TeleSpeechCtc := %s, ' + 1550 'TeleSpeechCtc := %s, ' +
1535 'SenseVoice := %s, ' + 1551 'SenseVoice := %s, ' +
1536 'Moonshine := %s, ' + 1552 'Moonshine := %s, ' +
1537 - 'FireRedAsr := %s' + 1553 + 'FireRedAsr := %s, ' +
  1554 + 'Dolphin := %s' +
1538 ')', 1555 ')',
1539 [Self.Transducer.ToString, Self.Paraformer.ToString, 1556 [Self.Transducer.ToString, Self.Paraformer.ToString,
1540 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, 1557 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
1541 Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, 1558 Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
1542 Self.ModelType, Self.ModelingUnit, Self.BpeVocab, 1559 Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
1543 Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, 1560 Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
1544 - Self.FireRedAsr.ToString 1561 + Self.FireRedAsr.ToString, Self.Dolphin.ToString
1545 ]); 1562 ]);
1546 end; 1563 end;
1547 1564
@@ -1610,6 +1627,8 @@ begin @@ -1610,6 +1627,8 @@ begin
1610 C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder); 1627 C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
1611 C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder); 1628 C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
1612 1629
  1630 + C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
  1631 +
1613 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); 1632 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
1614 C.LMConfig.Scale := Config.LMConfig.Scale; 1633 C.LMConfig.Scale := Config.LMConfig.Scale;
1615 1634