Committed by
GitHub
Add Pascal API for Dolphin CTC models (#2096)
正在显示
11 个修改的文件
包含
343 行增加
和
7 行删除
| @@ -149,6 +149,11 @@ jobs: | @@ -149,6 +149,11 @@ jobs: | ||
| 149 | cd ./pascal-api-examples | 149 | cd ./pascal-api-examples |
| 150 | 150 | ||
| 151 | pushd non-streaming-asr | 151 | pushd non-streaming-asr |
| 152 | + | ||
| 153 | + ./run-dolphin-ctc.sh | ||
| 154 | + rm -rf sherpa-onnx-* | ||
| 155 | + echo "---" | ||
| 156 | + | ||
| 152 | ./run-zipformer-transducer.sh | 157 | ./run-zipformer-transducer.sh |
| 153 | rm -rf sherpa-onnx-* | 158 | rm -rf sherpa-onnx-* |
| 154 | echo "---" | 159 | echo "---" |
| @@ -253,7 +258,13 @@ jobs: | @@ -253,7 +258,13 @@ jobs: | ||
| 253 | 258 | ||
| 254 | cd ./pascal-api-examples | 259 | cd ./pascal-api-examples |
| 255 | 260 | ||
| 261 | + | ||
| 256 | pushd vad-with-non-streaming-asr | 262 | pushd vad-with-non-streaming-asr |
| 263 | + | ||
| 264 | + time ./run-vad-with-dolphin-ctc.sh | ||
| 265 | + rm -rf sherpa-onnx-* | ||
| 266 | + echo "---" | ||
| 267 | + | ||
| 257 | time ./run-vad-with-moonshine.sh | 268 | time ./run-vad-with-moonshine.sh |
| 258 | rm -rf sherpa-onnx-* | 269 | rm -rf sherpa-onnx-* |
| 259 | echo "---" | 270 | echo "---" |
| @@ -60,7 +60,7 @@ This repository supports running the following functions **locally** | @@ -60,7 +60,7 @@ This repository supports running the following functions **locally** | ||
| 60 | 60 | ||
| 61 | on the following platforms and operating systems: | 61 | on the following platforms and operating systems: |
| 62 | 62 | ||
| 63 | - - x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64) | 63 | + - x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64), **RK NPU** |
| 64 | - Linux, macOS, Windows, openKylin | 64 | - Linux, macOS, Windows, openKylin |
| 65 | - Android, WearOS | 65 | - Android, WearOS |
| 66 | - iOS | 66 | - iOS |
| @@ -5,6 +5,7 @@ APIs with non-streaming models for speech recognition. | @@ -5,6 +5,7 @@ APIs with non-streaming models for speech recognition. | ||
| 5 | 5 | ||
| 6 | |File|Description| | 6 | |File|Description| |
| 7 | |----|-----------| | 7 | |----|-----------| |
| 8 | +|[run-dolphin-ctc.sh](./run-dolphin-ctc.sh)|Use a non-streaming [Dolphin](https://github.com/DataoceanAI/Dolphin) CTC model for speech recognition| | ||
| 8 | |[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition| | 9 | |[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition| |
| 9 | |[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition| | 10 | |[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition| |
| 10 | |[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers| | 11 | |[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers| |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Dolphin CTC model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program dolphin_ctc; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx'; | ||
| 39 | + Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt'; | ||
| 40 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 41 | + Config.ModelConfig.NumThreads := 1; | ||
| 42 | + Config.ModelConfig.Debug := False; | ||
| 43 | + | ||
| 44 | + WaveFilename := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav'; | ||
| 45 | + | ||
| 46 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 47 | + | ||
| 48 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 49 | + Stream := Recognizer.CreateStream(); | ||
| 50 | + Start := Now; | ||
| 51 | + | ||
| 52 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 53 | + Recognizer.Decode(Stream); | ||
| 54 | + | ||
| 55 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 56 | + | ||
| 57 | + Stop := Now; | ||
| 58 | + | ||
| 59 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 60 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 61 | + RealTimeFactor := Elapsed / Duration; | ||
| 62 | + | ||
| 63 | + WriteLn(RecognitionResult.ToString); | ||
| 64 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 65 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 66 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 67 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 68 | + | ||
| 69 | + {Free resources to avoid memory leak. | ||
| 70 | + | ||
| 71 | + Note: You don't need to invoke them for this simple script. | ||
| 72 | + However, you have to invoke them in your own large/complex project. | ||
| 73 | + } | ||
| 74 | + FreeAndNil(Stream); | ||
| 75 | + FreeAndNil(Recognizer); | ||
| 76 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 30 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./dolphin_ctc.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./dolphin_ctc |
| @@ -6,7 +6,10 @@ with non-streaming speech recognition models. | @@ -6,7 +6,10 @@ with non-streaming speech recognition models. | ||
| 6 | 6 | ||
| 7 | |Directory| Description| | 7 | |Directory| Description| |
| 8 | |---------|------------| | 8 | |---------|------------| |
| 9 | -|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + Whisper for speech recognition.| | ||
| 10 | -|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + SenseVoice for speech recognition.| | 9 | +|[run-vad-with-dolphin-ctc.sh](./run-vad-with-dolphin-ctc.sh)|It shows how to use the VAD + [Dolphin](https://github.com/DataoceanAI/Dolphin) for speech recognition.| |
| 10 | +|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + [Whisper](https://github.com/openai/whisper) for speech recognition.| | ||
| 11 | +|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + [SenseVoice](https://github.com/FunAudioLLM/SenseVoice) for speech recognition.| | ||
| 12 | +|[run-vad-with-moonshine.sh](./run-vad-with-moonshine.sh)|It shows how to use the VAD + [Moonshine](https://github.com/usefulsensors/moonshine) for speech recognition.| | ||
| 13 | + | ||
| 11 | 14 | ||
| 12 | Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models. | 15 | Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [[ ! -f ./silero_vad.onnx ]]; then | ||
| 27 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 28 | +fi | ||
| 29 | + | ||
| 30 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 31 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then | ||
| 35 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 36 | + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 37 | + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +fpc \ | ||
| 41 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 42 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 43 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 44 | + ./vad_with_dolphin.pas | ||
| 45 | + | ||
| 46 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 47 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 48 | + | ||
| 49 | +./vad_with_dolphin |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Dolphin model | ||
| 5 | +with silero VAD to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program vad_with_dolphin; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + SysUtils; | ||
| 18 | + | ||
| 19 | +function CreateVad(): TSherpaOnnxVoiceActivityDetector; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxVadModelConfig; | ||
| 22 | + | ||
| 23 | + SampleRate: Integer; | ||
| 24 | + WindowSize: Integer; | ||
| 25 | +begin | ||
| 26 | + Initialize(Config); | ||
| 27 | + | ||
| 28 | + SampleRate := 16000; {Please don't change it unless you know the details} | ||
| 29 | + WindowSize := 512; {Please don't change it unless you know the details} | ||
| 30 | + | ||
| 31 | + Config.SileroVad.Model := './silero_vad.onnx'; | ||
| 32 | + Config.SileroVad.MinSpeechDuration := 0.5; | ||
| 33 | + Config.SileroVad.MinSilenceDuration := 0.5; | ||
| 34 | + Config.SileroVad.Threshold := 0.5; | ||
| 35 | + Config.SileroVad.WindowSize := WindowSize; | ||
| 36 | + Config.NumThreads:= 1; | ||
| 37 | + Config.Debug:= True; | ||
| 38 | + Config.Provider:= 'cpu'; | ||
| 39 | + Config.SampleRate := SampleRate; | ||
| 40 | + | ||
| 41 | + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); | ||
| 42 | +end; | ||
| 43 | + | ||
| 44 | +function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer; | ||
| 45 | +var | ||
| 46 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 47 | +begin | ||
| 48 | + Initialize(Config); | ||
| 49 | + | ||
| 50 | + Config.ModelConfig.Dolphin.Model := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx'; | ||
| 51 | + Config.ModelConfig.Tokens := './sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt'; | ||
| 52 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 53 | + Config.ModelConfig.NumThreads := 1; | ||
| 54 | + Config.ModelConfig.Debug := False; | ||
| 55 | + | ||
| 56 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 57 | +end; | ||
| 58 | + | ||
| 59 | +var | ||
| 60 | + Wave: TSherpaOnnxWave; | ||
| 61 | + | ||
| 62 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 63 | + Vad: TSherpaOnnxVoiceActivityDetector; | ||
| 64 | + | ||
| 65 | + Offset: Integer; | ||
| 66 | + WindowSize: Integer; | ||
| 67 | + SpeechSegment: TSherpaOnnxSpeechSegment; | ||
| 68 | + | ||
| 69 | + Start: Single; | ||
| 70 | + Duration: Single; | ||
| 71 | + | ||
| 72 | + Stream: TSherpaOnnxOfflineStream; | ||
| 73 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 74 | +begin | ||
| 75 | + Vad := CreateVad(); | ||
| 76 | + Recognizer := CreateOfflineRecognizer(); | ||
| 77 | + | ||
| 78 | + Wave := SherpaOnnxReadWave('./lei-jun-test.wav'); | ||
| 79 | + if Wave.SampleRate <> Vad.Config.SampleRate then | ||
| 80 | + begin | ||
| 81 | + WriteLn(Format('Expected sample rate: %d. Given: %d', | ||
| 82 | + [Vad.Config.SampleRate, Wave.SampleRate])); | ||
| 83 | + | ||
| 84 | + Exit; | ||
| 85 | + end; | ||
| 86 | + | ||
| 87 | + WindowSize := Vad.Config.SileroVad.WindowSize; | ||
| 88 | + Offset := 0; | ||
| 89 | + while Offset + WindowSize <= Length(Wave.Samples) do | ||
| 90 | + begin | ||
| 91 | + Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize); | ||
| 92 | + Offset += WindowSize; | ||
| 93 | + | ||
| 94 | + while not Vad.IsEmpty do | ||
| 95 | + begin | ||
| 96 | + SpeechSegment := Vad.Front(); | ||
| 97 | + Vad.Pop(); | ||
| 98 | + Stream := Recognizer.CreateStream(); | ||
| 99 | + | ||
| 100 | + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); | ||
| 101 | + Recognizer.Decode(Stream); | ||
| 102 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 103 | + | ||
| 104 | + Start := SpeechSegment.Start / Wave.SampleRate; | ||
| 105 | + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; | ||
| 106 | + WriteLn(Format('%.3f -- %.3f %s', | ||
| 107 | + [Start, Start + Duration, RecognitionResult.Text])); | ||
| 108 | + | ||
| 109 | + FreeAndNil(Stream); | ||
| 110 | + end; | ||
| 111 | + end; | ||
| 112 | + | ||
| 113 | + Vad.Flush; | ||
| 114 | + | ||
| 115 | + while not Vad.IsEmpty do | ||
| 116 | + begin | ||
| 117 | + SpeechSegment := Vad.Front(); | ||
| 118 | + Vad.Pop(); | ||
| 119 | + Stream := Recognizer.CreateStream(); | ||
| 120 | + | ||
| 121 | + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); | ||
| 122 | + Recognizer.Decode(Stream); | ||
| 123 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 124 | + | ||
| 125 | + Start := SpeechSegment.Start / Wave.SampleRate; | ||
| 126 | + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; | ||
| 127 | + WriteLn(Format('%.3f -- %.3f %s', | ||
| 128 | + [Start, Start + Duration, RecognitionResult.Text])); | ||
| 129 | + | ||
| 130 | + FreeAndNil(Stream); | ||
| 131 | + end; | ||
| 132 | + | ||
| 133 | + FreeAndNil(Recognizer); | ||
| 134 | + FreeAndNil(Vad); | ||
| 135 | +end. |
| @@ -8,7 +8,7 @@ You can download the model files from | @@ -8,7 +8,7 @@ You can download the model files from | ||
| 8 | https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | 8 | https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models |
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | -program vad_with_whisper; | 11 | +program vad_with_sense_voice; |
| 12 | 12 | ||
| 13 | {$mode objfpc} | 13 | {$mode objfpc} |
| 14 | 14 |
| @@ -1969,7 +1969,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( | @@ -1969,7 +1969,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( | ||
| 1969 | return p->impl->GetOutputSamplingRate(); | 1969 | return p->impl->GetOutputSamplingRate(); |
| 1970 | } | 1970 | } |
| 1971 | 1971 | ||
| 1972 | -void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) { | 1972 | +void SherpaOnnxLinearResamplerReset(const SherpaOnnxLinearResampler *p) { |
| 1973 | p->impl->Reset(); | 1973 | p->impl->Reset(); |
| 1974 | } | 1974 | } |
| 1975 | 1975 |
| @@ -270,6 +270,11 @@ type | @@ -270,6 +270,11 @@ type | ||
| 270 | function ToString: AnsiString; | 270 | function ToString: AnsiString; |
| 271 | end; | 271 | end; |
| 272 | 272 | ||
| 273 | + TSherpaOnnxOfflineDolphinModelConfig = record | ||
| 274 | + Model: AnsiString; | ||
| 275 | + function ToString: AnsiString; | ||
| 276 | + end; | ||
| 277 | + | ||
| 273 | TSherpaOnnxOfflineWhisperModelConfig = record | 278 | TSherpaOnnxOfflineWhisperModelConfig = record |
| 274 | Encoder: AnsiString; | 279 | Encoder: AnsiString; |
| 275 | Decoder: AnsiString; | 280 | Decoder: AnsiString; |
| @@ -331,6 +336,7 @@ type | @@ -331,6 +336,7 @@ type | ||
| 331 | SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; | 336 | SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; |
| 332 | Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; | 337 | Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; |
| 333 | FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; | 338 | FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; |
| 339 | + Dolphin: TSherpaOnnxOfflineDolphinModelConfig; | ||
| 334 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); | 340 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); |
| 335 | function ToString: AnsiString; | 341 | function ToString: AnsiString; |
| 336 | end; | 342 | end; |
| @@ -694,6 +700,9 @@ type | @@ -694,6 +700,9 @@ type | ||
| 694 | SherpaOnnxOfflineNemoEncDecCtcModelConfig = record | 700 | SherpaOnnxOfflineNemoEncDecCtcModelConfig = record |
| 695 | Model: PAnsiChar; | 701 | Model: PAnsiChar; |
| 696 | end; | 702 | end; |
| 703 | + SherpaOnnxOfflineDolphinModelConfig = record | ||
| 704 | + Model: PAnsiChar; | ||
| 705 | + end; | ||
| 697 | SherpaOnnxOfflineWhisperModelConfig = record | 706 | SherpaOnnxOfflineWhisperModelConfig = record |
| 698 | Encoder: PAnsiChar; | 707 | Encoder: PAnsiChar; |
| 699 | Decoder: PAnsiChar; | 708 | Decoder: PAnsiChar; |
| @@ -740,6 +749,7 @@ type | @@ -740,6 +749,7 @@ type | ||
| 740 | SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; | 749 | SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; |
| 741 | Moonshine: SherpaOnnxOfflineMoonshineModelConfig; | 750 | Moonshine: SherpaOnnxOfflineMoonshineModelConfig; |
| 742 | FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; | 751 | FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; |
| 752 | + Dolphin: SherpaOnnxOfflineDolphinModelConfig; | ||
| 743 | end; | 753 | end; |
| 744 | 754 | ||
| 745 | SherpaOnnxOfflineRecognizerConfig = record | 755 | SherpaOnnxOfflineRecognizerConfig = record |
| @@ -1461,6 +1471,12 @@ begin | @@ -1461,6 +1471,12 @@ begin | ||
| 1461 | [Self.Model]); | 1471 | [Self.Model]); |
| 1462 | end; | 1472 | end; |
| 1463 | 1473 | ||
| 1474 | +function TSherpaOnnxOfflineDolphinModelConfig.ToString: AnsiString; | ||
| 1475 | +begin | ||
| 1476 | + Result := Format('TSherpaOnnxOfflineDolphinModelConfig(Model := %s)', | ||
| 1477 | + [Self.Model]); | ||
| 1478 | +end; | ||
| 1479 | + | ||
| 1464 | function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; | 1480 | function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; |
| 1465 | begin | 1481 | begin |
| 1466 | Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + | 1482 | Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + |
| @@ -1534,14 +1550,15 @@ begin | @@ -1534,14 +1550,15 @@ begin | ||
| 1534 | 'TeleSpeechCtc := %s, ' + | 1550 | 'TeleSpeechCtc := %s, ' + |
| 1535 | 'SenseVoice := %s, ' + | 1551 | 'SenseVoice := %s, ' + |
| 1536 | 'Moonshine := %s, ' + | 1552 | 'Moonshine := %s, ' + |
| 1537 | - 'FireRedAsr := %s' + | 1553 | + 'FireRedAsr := %s, ' + |
| 1554 | + 'Dolphin := %s' + | ||
| 1538 | ')', | 1555 | ')', |
| 1539 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1556 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1540 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, | 1557 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, |
| 1541 | Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, | 1558 | Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, |
| 1542 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, | 1559 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, |
| 1543 | Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, | 1560 | Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, |
| 1544 | - Self.FireRedAsr.ToString | 1561 | + Self.FireRedAsr.ToString, Self.Dolphin.ToString |
| 1545 | ]); | 1562 | ]); |
| 1546 | end; | 1563 | end; |
| 1547 | 1564 | ||
| @@ -1610,6 +1627,8 @@ begin | @@ -1610,6 +1627,8 @@ begin | ||
| 1610 | C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder); | 1627 | C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder); |
| 1611 | C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder); | 1628 | C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder); |
| 1612 | 1629 | ||
| 1630 | + C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model); | ||
| 1631 | + | ||
| 1613 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); | 1632 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); |
| 1614 | C.LMConfig.Scale := Config.LMConfig.Scale; | 1633 | C.LMConfig.Scale := Config.LMConfig.Scale; |
| 1615 | 1634 |
-
请 注册 或 登录 后发表评论