正在显示
24 个修改的文件
包含
1199 行增加
和
14 行删除
| @@ -116,12 +116,54 @@ jobs: | @@ -116,12 +116,54 @@ jobs: | ||
| 116 | cp -v install/lib/*.dll ../pascal-api-examples/read-wav | 116 | cp -v install/lib/*.dll ../pascal-api-examples/read-wav |
| 117 | cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr | 117 | cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr |
| 118 | cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr | 118 | cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr |
| 119 | + cp -v install/lib/*.dll ../pascal-api-examples/vad | ||
| 120 | + cp -v install/lib/*.dll ../pascal-api-examples/vad-with-non-streaming-asr | ||
| 119 | 121 | ||
| 120 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav | 122 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav |
| 121 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr | 123 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr |
| 122 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr | 124 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr |
| 125 | + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/vad | ||
| 126 | + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/vad-with-non-streaming-asr | ||
| 123 | fi | 127 | fi |
| 124 | 128 | ||
| 129 | + - name: Run Pascal test (VAD + non-streaming ASR) | ||
| 130 | + shell: bash | ||
| 131 | + run: | | ||
| 132 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 133 | + | ||
| 134 | + cd ./pascal-api-examples | ||
| 135 | + | ||
| 136 | + pushd vad-with-non-streaming-asr | ||
| 137 | + time ./run-vad-with-whisper.sh | ||
| 138 | + rm -rf sherpa-onnx-* | ||
| 139 | + echo "---" | ||
| 140 | + | ||
| 141 | + time ./run-vad-with-sense-voice.sh | ||
| 142 | + rm -rf sherpa-onnx-* | ||
| 143 | + echo "---" | ||
| 144 | + | ||
| 145 | + ls -lh | ||
| 146 | + | ||
| 147 | + popd | ||
| 148 | + | ||
| 149 | + - name: Run Pascal test (VAD test) | ||
| 150 | + shell: bash | ||
| 151 | + run: | | ||
| 152 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 153 | + | ||
| 154 | + cd ./pascal-api-examples | ||
| 155 | + | ||
| 156 | + pushd vad | ||
| 157 | + ./run-circular-buffer.sh | ||
| 158 | + echo "---" | ||
| 159 | + | ||
| 160 | + time ./run-remove-silence.sh | ||
| 161 | + echo "---" | ||
| 162 | + | ||
| 163 | + ls -lh | ||
| 164 | + | ||
| 165 | + popd | ||
| 166 | + | ||
| 125 | - name: Run Pascal test (Read wav test) | 167 | - name: Run Pascal test (Read wav test) |
| 126 | shell: bash | 168 | shell: bash |
| 127 | run: | | 169 | run: | |
| @@ -8,3 +8,5 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | @@ -8,3 +8,5 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | ||
| 8 | |[read-wav](./read-wav)|It shows how to read a wave file.| | 8 | |[read-wav](./read-wav)|It shows how to read a wave file.| |
| 9 | |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| | 9 | |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| |
| 10 | |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| | 10 | |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| |
| 11 | +|[vad](./vad)| It shows how to use the voice activity detection API.| | ||
| 12 | +|[vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| It shows how to use the voice activity detection API with non-streaming models for speech recognition.| |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.NeMoCtC.Model := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx'; | 38 | Config.ModelConfig.NeMoCtC.Model := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx'; |
| 37 | Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt'; | 39 | Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt'; |
| 38 | Config.ModelConfig.Provider := 'cpu'; | 40 | Config.ModelConfig.Provider := 'cpu'; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx'; | 38 | Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx'; |
| 37 | Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx'; | 39 | Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx'; |
| 38 | Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx'; | 40 | Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx'; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'; | 38 | Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'; |
| 37 | Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'; | 39 | Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'; |
| 38 | Config.ModelConfig.Provider := 'cpu'; | 40 | Config.ModelConfig.Provider := 'cpu'; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'; | 38 | Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'; |
| 37 | Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'; | 39 | Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'; |
| 38 | Config.ModelConfig.Provider := 'cpu'; | 40 | Config.ModelConfig.Provider := 'cpu'; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx'; | 38 | Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx'; |
| 37 | Config.ModelConfig.SenseVoice.Language := 'auto'; | 39 | Config.ModelConfig.SenseVoice.Language := 'auto'; |
| 38 | Config.ModelConfig.SenseVoice.UseItn := False; | 40 | Config.ModelConfig.SenseVoice.UseItn := False; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.TeleSpeechCtc := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx'; | 38 | Config.ModelConfig.TeleSpeechCtc := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx'; |
| 37 | Config.ModelConfig.Tokens := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt'; | 39 | Config.ModelConfig.Tokens := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt'; |
| 38 | Config.ModelConfig.Provider := 'cpu'; | 40 | Config.ModelConfig.Provider := 'cpu'; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | 38 | Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; |
| 37 | Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | 39 | Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; |
| 38 | Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | 40 | Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; |
| @@ -33,6 +33,8 @@ var | @@ -33,6 +33,8 @@ var | ||
| 33 | Duration: Single; | 33 | Duration: Single; |
| 34 | RealTimeFactor: Single; | 34 | RealTimeFactor: Single; |
| 35 | begin | 35 | begin |
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 36 | Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx'; | 38 | Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx'; |
| 37 | Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx'; | 39 | Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx'; |
| 38 | Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx'; | 40 | Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx'; |
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | + | ||
| 4 | +This directory contains examples for how to use the VAD (voice activity detection) | ||
| 5 | +with non-streaming speech recognition models. | ||
| 6 | + | ||
| 7 | +|Directory| Description| | ||
| 8 | +|---------|------------| | ||
| 9 | +|[run-vad-with-whisper.sh](./run-vad-with-whisper.sh)|It shows how to use the VAD + Whisper for speech recognition.| | ||
| 10 | +|[run-vad-with-sense-voice.sh](./run-vad-with-sense-voice.sh)|It shows how to use the VAD + SenseVoice for speech recognition.| | ||
| 11 | + | ||
| 12 | +Please refer to [non-streaming-asr](../non-streaming-asr) for more kinds of non-streaming models. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [[ ! -f ./silero_vad.onnx ]]; then | ||
| 27 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 28 | +fi | ||
| 29 | + | ||
| 30 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 31 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then | ||
| 35 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 36 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 37 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +fpc \ | ||
| 41 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 42 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 43 | + ./vad_with_sense_voice.pas | ||
| 44 | + | ||
| 45 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 46 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 47 | + | ||
| 48 | +./vad_with_sense_voice |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [[ ! -f ./silero_vad.onnx ]]; then | ||
| 27 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 28 | +fi | ||
| 29 | + | ||
| 30 | +if [ ! -f ./Obama.wav ]; then | ||
| 31 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then | ||
| 35 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 36 | + | ||
| 37 | + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 38 | + rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 39 | +fi | ||
| 40 | + | ||
| 41 | +fpc \ | ||
| 42 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 43 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 44 | + ./vad_with_whisper.pas | ||
| 45 | + | ||
| 46 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 47 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 48 | + | ||
| 49 | +./vad_with_whisper |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming SenseVoice model | ||
| 5 | +with silero VAD to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program vad_with_whisper; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + SysUtils; | ||
| 18 | + | ||
| 19 | +function CreateVad(): TSherpaOnnxVoiceActivityDetector; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxVadModelConfig; | ||
| 22 | + | ||
| 23 | + SampleRate: Integer; | ||
| 24 | + WindowSize: Integer; | ||
| 25 | +begin | ||
| 26 | + Initialize(Config); | ||
| 27 | + | ||
| 28 | + SampleRate := 16000; {Please don't change it unless you know the details} | ||
| 29 | + WindowSize := 512; {Please don't change it unless you know the details} | ||
| 30 | + | ||
| 31 | + Config.SileroVad.Model := './silero_vad.onnx'; | ||
| 32 | + Config.SileroVad.MinSpeechDuration := 0.5; | ||
| 33 | + Config.SileroVad.MinSilenceDuration := 0.5; | ||
| 34 | + Config.SileroVad.Threshold := 0.5; | ||
| 35 | + Config.SileroVad.WindowSize := WindowSize; | ||
| 36 | + Config.NumThreads:= 1; | ||
| 37 | + Config.Debug:= True; | ||
| 38 | + Config.Provider:= 'cpu'; | ||
| 39 | + Config.SampleRate := SampleRate; | ||
| 40 | + | ||
| 41 | + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); | ||
| 42 | +end; | ||
| 43 | + | ||
| 44 | +function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer; | ||
| 45 | +var | ||
| 46 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 47 | +begin | ||
| 48 | + Initialize(Config); | ||
| 49 | + | ||
| 50 | + Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx'; | ||
| 51 | + Config.ModelConfig.SenseVoice.Language := 'auto'; | ||
| 52 | + Config.ModelConfig.SenseVoice.UseItn := False; | ||
| 53 | + Config.ModelConfig.Tokens := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt'; | ||
| 54 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 55 | + Config.ModelConfig.NumThreads := 1; | ||
| 56 | + Config.ModelConfig.Debug := False; | ||
| 57 | + | ||
| 58 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 59 | +end; | ||
| 60 | + | ||
| 61 | +var | ||
| 62 | + Wave: TSherpaOnnxWave; | ||
| 63 | + | ||
| 64 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 65 | + Vad: TSherpaOnnxVoiceActivityDetector; | ||
| 66 | + | ||
| 67 | + Offset: Integer; | ||
| 68 | + WindowSize: Integer; | ||
| 69 | + SpeechSegment: TSherpaOnnxSpeechSegment; | ||
| 70 | + | ||
| 71 | + Start: Single; | ||
| 72 | + Duration: Single; | ||
| 73 | + | ||
| 74 | + Stream: TSherpaOnnxOfflineStream; | ||
| 75 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 76 | +begin | ||
| 77 | + Vad := CreateVad(); | ||
| 78 | + Recognizer := CreateOfflineRecognizer(); | ||
| 79 | + | ||
| 80 | + Wave := SherpaOnnxReadWave('./lei-jun-test.wav'); | ||
| 81 | + if Wave.SampleRate <> Vad.Config.SampleRate then | ||
| 82 | + begin | ||
| 83 | + WriteLn(Format('Expected sample rate: %d. Given: %d', | ||
| 84 | + [Vad.Config.SampleRate, Wave.SampleRate])); | ||
| 85 | + | ||
| 86 | + Exit; | ||
| 87 | + end; | ||
| 88 | + | ||
| 89 | + WindowSize := Vad.Config.SileroVad.WindowSize; | ||
| 90 | + Offset := 0; | ||
| 91 | + while Offset + WindowSize <= Length(Wave.Samples) do | ||
| 92 | + begin | ||
| 93 | + Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize); | ||
| 94 | + Offset += WindowSize; | ||
| 95 | + | ||
| 96 | + while not Vad.IsEmpty do | ||
| 97 | + begin | ||
| 98 | + SpeechSegment := Vad.Front(); | ||
| 99 | + Vad.Pop(); | ||
| 100 | + Stream := Recognizer.CreateStream(); | ||
| 101 | + | ||
| 102 | + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); | ||
| 103 | + Recognizer.Decode(Stream); | ||
| 104 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 105 | + | ||
| 106 | + Start := SpeechSegment.Start / Wave.SampleRate; | ||
| 107 | + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; | ||
| 108 | + WriteLn(Format('%.3f -- %.3f %s', | ||
| 109 | + [Start, Start + Duration, RecognitionResult.Text])); | ||
| 110 | + | ||
| 111 | + FreeAndNil(Stream); | ||
| 112 | + end; | ||
| 113 | + end; | ||
| 114 | + | ||
| 115 | + Vad.Flush; | ||
| 116 | + | ||
| 117 | + while not Vad.IsEmpty do | ||
| 118 | + begin | ||
| 119 | + SpeechSegment := Vad.Front(); | ||
| 120 | + Vad.Pop(); | ||
| 121 | + Stream := Recognizer.CreateStream(); | ||
| 122 | + | ||
| 123 | + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); | ||
| 124 | + Recognizer.Decode(Stream); | ||
| 125 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 126 | + | ||
| 127 | + Start := SpeechSegment.Start / Wave.SampleRate; | ||
| 128 | + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; | ||
| 129 | + WriteLn(Format('%.3f -- %.3f %s', | ||
| 130 | + [Start, Start + Duration, RecognitionResult.Text])); | ||
| 131 | + | ||
| 132 | + FreeAndNil(Stream); | ||
| 133 | + end; | ||
| 134 | + | ||
| 135 | + FreeAndNil(Recognizer); | ||
| 136 | + FreeAndNil(Vad); | ||
| 137 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Whisper model | ||
| 5 | +with silero VAD to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program vad_with_whisper; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + SysUtils; | ||
| 18 | + | ||
| 19 | +function CreateVad(): TSherpaOnnxVoiceActivityDetector; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxVadModelConfig; | ||
| 22 | + | ||
| 23 | + SampleRate: Integer; | ||
| 24 | + WindowSize: Integer; | ||
| 25 | +begin | ||
| 26 | + Initialize(Config); | ||
| 27 | + | ||
| 28 | + SampleRate := 16000; {Please don't change it unless you know the details} | ||
| 29 | + WindowSize := 512; {Please don't change it unless you know the details} | ||
| 30 | + | ||
| 31 | + Config.SileroVad.Model := './silero_vad.onnx'; | ||
| 32 | + Config.SileroVad.MinSpeechDuration := 0.5; | ||
| 33 | + Config.SileroVad.MinSilenceDuration := 0.5; | ||
| 34 | + Config.SileroVad.Threshold := 0.5; | ||
| 35 | + Config.SileroVad.WindowSize := WindowSize; | ||
| 36 | + Config.NumThreads:= 1; | ||
| 37 | + Config.Debug:= True; | ||
| 38 | + Config.Provider:= 'cpu'; | ||
| 39 | + Config.SampleRate := SampleRate; | ||
| 40 | + | ||
| 41 | + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); | ||
| 42 | +end; | ||
| 43 | + | ||
| 44 | +function CreateOfflineRecognizer(): TSherpaOnnxOfflineRecognizer; | ||
| 45 | +var | ||
| 46 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 47 | +begin | ||
| 48 | + Initialize(Config); | ||
| 49 | + | ||
| 50 | + Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | ||
| 51 | + Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | ||
| 52 | + Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | ||
| 53 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 54 | + Config.ModelConfig.NumThreads := 1; | ||
| 55 | + Config.ModelConfig.Debug := False; | ||
| 56 | + | ||
| 57 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 58 | +end; | ||
| 59 | + | ||
| 60 | +var | ||
| 61 | + Wave: TSherpaOnnxWave; | ||
| 62 | + | ||
| 63 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 64 | + Vad: TSherpaOnnxVoiceActivityDetector; | ||
| 65 | + | ||
| 66 | + Offset: Integer; | ||
| 67 | + WindowSize: Integer; | ||
| 68 | + SpeechSegment: TSherpaOnnxSpeechSegment; | ||
| 69 | + | ||
| 70 | + Start: Single; | ||
| 71 | + Duration: Single; | ||
| 72 | + | ||
| 73 | + Stream: TSherpaOnnxOfflineStream; | ||
| 74 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 75 | +begin | ||
| 76 | + Vad := CreateVad(); | ||
| 77 | + Recognizer := CreateOfflineRecognizer(); | ||
| 78 | + | ||
| 79 | + Wave := SherpaOnnxReadWave('./Obama.wav'); | ||
| 80 | + if Wave.SampleRate <> Vad.Config.SampleRate then | ||
| 81 | + begin | ||
| 82 | + WriteLn(Format('Expected sample rate: %d. Given: %d', | ||
| 83 | + [Vad.Config.SampleRate, Wave.SampleRate])); | ||
| 84 | + | ||
| 85 | + Exit; | ||
| 86 | + end; | ||
| 87 | + | ||
| 88 | + WindowSize := Vad.Config.SileroVad.WindowSize; | ||
| 89 | + Offset := 0; | ||
| 90 | + while Offset + WindowSize <= Length(Wave.Samples) do | ||
| 91 | + begin | ||
| 92 | + Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize); | ||
| 93 | + Offset += WindowSize; | ||
| 94 | + | ||
| 95 | + while not Vad.IsEmpty do | ||
| 96 | + begin | ||
| 97 | + SpeechSegment := Vad.Front(); | ||
| 98 | + Vad.Pop(); | ||
| 99 | + Stream := Recognizer.CreateStream(); | ||
| 100 | + | ||
| 101 | + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); | ||
| 102 | + Recognizer.Decode(Stream); | ||
| 103 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 104 | + | ||
| 105 | + Start := SpeechSegment.Start / Wave.SampleRate; | ||
| 106 | + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; | ||
| 107 | + WriteLn(Format('%.3f -- %.3f %s', | ||
| 108 | + [Start, Start + Duration, RecognitionResult.Text])); | ||
| 109 | + | ||
| 110 | + FreeAndNil(Stream); | ||
| 111 | + end; | ||
| 112 | + end; | ||
| 113 | + | ||
| 114 | + Vad.Flush; | ||
| 115 | + | ||
| 116 | + while not Vad.IsEmpty do | ||
| 117 | + begin | ||
| 118 | + SpeechSegment := Vad.Front(); | ||
| 119 | + Vad.Pop(); | ||
| 120 | + Stream := Recognizer.CreateStream(); | ||
| 121 | + | ||
| 122 | + Stream.AcceptWaveform(SpeechSegment.Samples, Wave.SampleRate); | ||
| 123 | + Recognizer.Decode(Stream); | ||
| 124 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 125 | + | ||
| 126 | + Start := SpeechSegment.Start / Wave.SampleRate; | ||
| 127 | + Duration := Length(SpeechSegment.Samples) / Wave.SampleRate; | ||
| 128 | + WriteLn(Format('%.3f -- %.3f %s', | ||
| 129 | + [Start, Start + Duration, RecognitionResult.Text])); | ||
| 130 | + | ||
| 131 | + FreeAndNil(Stream); | ||
| 132 | + end; | ||
| 133 | + | ||
| 134 | + FreeAndNil(Recognizer); | ||
| 135 | + FreeAndNil(Vad); | ||
| 136 | +end. |
pascal-api-examples/vad/.gitignore
0 → 100644
pascal-api-examples/vad/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | + | ||
| 4 | +This directory contains examples for how to use the VAD (voice activity detection) | ||
| 5 | +APIs. | ||
| 6 | + | ||
| 7 | +|Directory| Description| | ||
| 8 | +|---------|------------| | ||
| 9 | +|[run-circular-buffer.sh](./run-circular-buffer.sh)|It shows how to use the circular buffer API.| | ||
| 10 | +|[run-remove-silence.sh](./run-remove-silence.sh)|It shows how to use the VAD API to remove silences from a wave file.| | ||
| 11 | + |
pascal-api-examples/vad/circular_buffer.pas
0 → 100644
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | +program circular_buffer; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the CircularBuffer API of sherpa-onnx | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +{$mode objfpc} | ||
| 8 | +{$ASSERTIONS ON} | ||
| 9 | + | ||
| 10 | +uses | ||
| 11 | + sherpa_onnx; | ||
| 12 | + | ||
| 13 | +var | ||
| 14 | + Buffer: TSherpaOnnxCircularBuffer; | ||
| 15 | + Samples: TSherpaOnnxSamplesArray; | ||
| 16 | +begin | ||
| 17 | + {The initial capacity is 5. It will be resized automatically if needed.} | ||
| 18 | + Buffer := TSherpaOnnxCircularBuffer.Create(5); | ||
| 19 | + Assert(Buffer.Size = 0); | ||
| 20 | + Assert(Buffer.Head = 0); | ||
| 21 | + Buffer.Push([0, 10, 20]); | ||
| 22 | + | ||
| 23 | + {Push() changes Size. Head is not changed.} | ||
| 24 | + Assert(Buffer.Size = 3); | ||
| 25 | + Assert(Buffer.Head = 0); | ||
| 26 | + | ||
| 27 | + Samples := Buffer.Get(0, 1); | ||
| 28 | + Assert(Length(Samples) = 1); | ||
| 29 | + Assert(Samples[0] = 0); | ||
| 30 | + | ||
| 31 | + { Get() does not change Size or Head} | ||
| 32 | + Assert(Buffer.Size = 3); | ||
| 33 | + Assert(Buffer.Head = 0); | ||
| 34 | + | ||
| 35 | + Samples := Buffer.Get(0, 2); | ||
| 36 | + Assert(Length(Samples) = 2); | ||
| 37 | + Assert(Samples[0] = 0); | ||
| 38 | + Assert(Samples[1] = 10); | ||
| 39 | + | ||
| 40 | + { The buffer will be resized since its initial capacity is 5 but we have | ||
| 41 | + pushed 7 elements into it. | ||
| 42 | + | ||
| 43 | + No data is lost during the resize. | ||
| 44 | + } | ||
| 45 | + Buffer.Push([30, 40, 50, 60]); | ||
| 46 | + | ||
| 47 | + Assert(Buffer.Size = 7); {There are now 7 elements} | ||
| 48 | + Assert(Buffer.Head = 0); | ||
| 49 | + | ||
| 50 | + {Remove the first 4 elements} | ||
| 51 | + Buffer.Pop(4); | ||
| 52 | + | ||
| 53 | + Assert(Buffer.Size = 3); {There are only 3 elements left} | ||
| 54 | + Assert(Buffer.Head = 4); | ||
| 55 | + | ||
| 56 | + Samples := Buffer.Get(Buffer.Head, 2); | ||
| 57 | + Assert(Length(Samples) = 2); | ||
| 58 | + Assert(Samples[0] = 40); | ||
| 59 | + Assert(Samples[1] = 50); | ||
| 60 | + | ||
| 61 | + Buffer.Pop(1); | ||
| 62 | + | ||
| 63 | + Assert(Buffer.Size = 2); {There are only 2 elements left} | ||
| 64 | + Assert(Buffer.Head = 5); | ||
| 65 | + | ||
| 66 | + Samples := Buffer.Get(Buffer.Head, 2); | ||
| 67 | + Assert(Length(Samples) = 2); | ||
| 68 | + Assert(Samples[0] = 50); | ||
| 69 | + Assert(Samples[1] = 60); | ||
| 70 | + | ||
| 71 | + Buffer.Pop(2); | ||
| 72 | + Assert(Buffer.Size = 0); {There are no elements left} | ||
| 73 | + Assert(Buffer.Head = 7); | ||
| 74 | + | ||
| 75 | + Buffer.Push([100, 200, 300, 400, 500]); | ||
| 76 | + Assert(Buffer.Size = 5); | ||
| 77 | + Assert(Buffer.Head = 7); | ||
| 78 | + | ||
| 79 | + Buffer.Pop(4); | ||
| 80 | + Assert(Buffer.Size = 1); | ||
| 81 | + | ||
| 82 | + {Head can be larger than the Capacity! | ||
| 83 | + This is what circular means. It points to Buffer.Head / Capacity. | ||
| 84 | + } | ||
| 85 | + Assert(Buffer.Head = 11); | ||
| 86 | + Buffer.Push([600, 700]); | ||
| 87 | + | ||
| 88 | + Assert(Buffer.Size = 3); | ||
| 89 | + Assert(Buffer.Head = 11); | ||
| 90 | + | ||
| 91 | + Samples := Buffer.Get(Buffer.Head, 3); | ||
| 92 | + Assert(Length(Samples) = 3); | ||
| 93 | + Assert(Samples[0] = 500); | ||
| 94 | + Assert(Samples[1] = 600); | ||
| 95 | + Assert(Samples[2] = 700); | ||
| 96 | + | ||
| 97 | + Buffer.Pop(3); | ||
| 98 | + Assert(Buffer.Size = 0); | ||
| 99 | + Assert(Buffer.Head = 14); | ||
| 100 | + | ||
| 101 | + Buffer.Reset(); | ||
| 102 | + | ||
| 103 | + Assert(Buffer.Size = 0); | ||
| 104 | + Assert(Buffer.Head = 0); | ||
| 105 | +end. | ||
| 106 | + |
pascal-api-examples/vad/remove_silence.pas
0 → 100644
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | +{ | ||
| 3 | +This file shows how to use the VAD API from sherpa-onnx | ||
| 4 | +to remove silences from a wave file. | ||
| 5 | +} | ||
| 6 | +program main; | ||
| 7 | + | ||
| 8 | +{$mode delphi} | ||
| 9 | + | ||
| 10 | +uses | ||
| 11 | + sherpa_onnx, | ||
| 12 | + SysUtils; | ||
| 13 | + | ||
| 14 | +var | ||
| 15 | + Wave: TSherpaOnnxWave; | ||
| 16 | + | ||
| 17 | + Config: TSherpaOnnxVadModelConfig; | ||
| 18 | + Vad: TSherpaOnnxVoiceActivityDetector; | ||
| 19 | + Offset: Integer; | ||
| 20 | + WindowSize: Integer; | ||
| 21 | + SpeechSegment: TSherpaOnnxSpeechSegment; | ||
| 22 | + | ||
| 23 | + Start: Single; | ||
| 24 | + Duration: Single; | ||
| 25 | + SampleRate: Integer; | ||
| 26 | + | ||
| 27 | + AllSpeechSegment: array of TSherpaOnnxSpeechSegment; | ||
| 28 | + AllSamples: array of Single; | ||
| 29 | + N: Integer; | ||
| 30 | + I: Integer; | ||
| 31 | +begin | ||
| 32 | + SampleRate := 16000; {Please don't change it unless you know the details} | ||
| 33 | + | ||
| 34 | + Wave := SherpaOnnxReadWave('./lei-jun-test.wav'); | ||
| 35 | + if Wave.SampleRate <> SampleRate then | ||
| 36 | + begin | ||
| 37 | + WriteLn(Format('Expected sample rate: %d. Given: %d', | ||
| 38 | + [SampleRate, Wave.SampleRate])); | ||
| 39 | + | ||
| 40 | + Exit; | ||
| 41 | + end; | ||
| 42 | + | ||
| 43 | + WindowSize := 512; {Please don't change it unless you know the details} | ||
| 44 | + Initialize(Config); | ||
| 45 | + | ||
| 46 | + Config.SileroVad.Model := './silero_vad.onnx'; | ||
| 47 | + Config.SileroVad.MinSpeechDuration := 0.25; | ||
| 48 | + Config.SileroVad.MinSilenceDuration := 0.5; | ||
| 49 | + Config.SileroVad.Threshold := 0.5; | ||
| 50 | + Config.SileroVad.WindowSize := WindowSize; | ||
| 51 | + Config.NumThreads:= 1; | ||
| 52 | + Config.Debug:= True; | ||
| 53 | + Config.Provider:= 'cpu'; | ||
| 54 | + Config.SampleRate := SampleRate; | ||
| 55 | + | ||
| 56 | + Vad := TSherpaOnnxVoiceActivityDetector.Create(Config, 20); | ||
| 57 | + | ||
| 58 | + AllSpeechSegment := nil; | ||
| 59 | + AllSamples := nil; | ||
| 60 | + Offset := 0; | ||
| 61 | + while Offset + WindowSize <= Length(Wave.Samples) do | ||
| 62 | + begin | ||
| 63 | + Vad.AcceptWaveform(Wave.Samples, Offset, WindowSize); | ||
| 64 | + Inc(Offset, WindowSize); | ||
| 65 | + | ||
| 66 | + while not Vad.IsEmpty do | ||
| 67 | + begin | ||
| 68 | + SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1); | ||
| 69 | + | ||
| 70 | + SpeechSegment := Vad.Front(); | ||
| 71 | + Vad.Pop(); | ||
| 72 | + AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment; | ||
| 73 | + | ||
| 74 | + Start := SpeechSegment.Start / SampleRate; | ||
| 75 | + Duration := Length(SpeechSegment.Samples) / SampleRate; | ||
| 76 | + WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration])); | ||
| 77 | + end; | ||
| 78 | + end; | ||
| 79 | + | ||
| 80 | + Vad.Flush; | ||
| 81 | + | ||
| 82 | + while not Vad.IsEmpty do | ||
| 83 | + begin | ||
| 84 | + SetLength(AllSpeechSegment, Length(AllSpeechSegment) + 1); | ||
| 85 | + | ||
| 86 | + SpeechSegment := Vad.Front(); | ||
| 87 | + Vad.Pop(); | ||
| 88 | + AllSpeechSegment[Length(AllSpeechSegment)-1] := SpeechSegment; | ||
| 89 | + | ||
| 90 | + Start := SpeechSegment.Start / SampleRate; | ||
| 91 | + Duration := Length(SpeechSegment.Samples) / SampleRate; | ||
| 92 | + WriteLn(Format('%.3f -- %.3f', [Start, Start + Duration])); | ||
| 93 | + end; | ||
| 94 | + | ||
| 95 | + N := 0; | ||
| 96 | + for SpeechSegment in AllSpeechSegment do | ||
| 97 | + Inc(N, Length(SpeechSegment.Samples)); | ||
| 98 | + | ||
| 99 | + SetLength(AllSamples, N); | ||
| 100 | + | ||
| 101 | + N := 0; | ||
| 102 | + for SpeechSegment in AllSpeechSegment do | ||
| 103 | + begin | ||
| 104 | + for I := Low(SpeechSegment.Samples) to High(SpeechSegment.Samples) do | ||
| 105 | + begin | ||
| 106 | + AllSamples[N] := SpeechSegment.Samples[I]; | ||
| 107 | + Inc(N); | ||
| 108 | + end; | ||
| 109 | + end; | ||
| 110 | + | ||
| 111 | + SherpaOnnxWriteWave('./lei-jun-test-no-silence.wav', AllSamples, SampleRate); | ||
| 112 | + WriteLn('Saved to ./lei-jun-test-no-silence.wav'); | ||
| 113 | + | ||
| 114 | + FreeAndNil(Vad); | ||
| 115 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +fpc \ | ||
| 27 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 28 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 29 | + ./circular_buffer.pas | ||
| 30 | + | ||
| 31 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 32 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 33 | + | ||
| 34 | +./circular_buffer |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [[ ! -f ./silero_vad.onnx ]]; then | ||
| 27 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
| 28 | +fi | ||
| 29 | + | ||
| 30 | +if [ ! -f ./lei-jun-test.wav ]; then | ||
| 31 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./remove_silence.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./remove_silence |
| @@ -95,6 +95,8 @@ void CircularBuffer::Push(const float *p, int32_t n) { | @@ -95,6 +95,8 @@ void CircularBuffer::Push(const float *p, int32_t n) { | ||
| 95 | "capacity to: %d", | 95 | "capacity to: %d", |
| 96 | n, size, n + size, capacity, new_capacity); | 96 | n, size, n + size, capacity, new_capacity); |
| 97 | Resize(new_capacity); | 97 | Resize(new_capacity); |
| 98 | + | ||
| 99 | + capacity = new_capacity; | ||
| 98 | } | 100 | } |
| 99 | 101 | ||
| 100 | int32_t start = tail_ % capacity; | 102 | int32_t start = tail_ % capacity; |
| @@ -2,9 +2,11 @@ | @@ -2,9 +2,11 @@ | ||
| 2 | 2 | ||
| 3 | unit sherpa_onnx; | 3 | unit sherpa_onnx; |
| 4 | 4 | ||
| 5 | -{$mode objfpc} | 5 | +{$IFDEF FPC} |
| 6 | + {$mode objfpc} | ||
| 7 | + {$modeSwitch advancedRecords} { to support records with methods } | ||
| 8 | +{$ENDIF} | ||
| 6 | 9 | ||
| 7 | -{$modeSwitch advancedRecords} { to support records with methods } | ||
| 8 | (* {$LongStrings ON} *) | 10 | (* {$LongStrings ON} *) |
| 9 | 11 | ||
| 10 | interface | 12 | interface |
| @@ -45,18 +47,21 @@ type | @@ -45,18 +47,21 @@ type | ||
| 45 | ModelingUnit: AnsiString; | 47 | ModelingUnit: AnsiString; |
| 46 | BpeVocab: AnsiString; | 48 | BpeVocab: AnsiString; |
| 47 | function ToString: AnsiString; | 49 | function ToString: AnsiString; |
| 50 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); | ||
| 48 | end; | 51 | end; |
| 49 | 52 | ||
| 50 | TSherpaOnnxFeatureConfig = record | 53 | TSherpaOnnxFeatureConfig = record |
| 51 | SampleRate: Integer; | 54 | SampleRate: Integer; |
| 52 | FeatureDim: Integer; | 55 | FeatureDim: Integer; |
| 53 | function ToString: AnsiString; | 56 | function ToString: AnsiString; |
| 57 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFeatureConfig); | ||
| 54 | end; | 58 | end; |
| 55 | 59 | ||
| 56 | TSherpaOnnxOnlineCtcFstDecoderConfig = record | 60 | TSherpaOnnxOnlineCtcFstDecoderConfig = record |
| 57 | Graph: AnsiString; | 61 | Graph: AnsiString; |
| 58 | MaxActive: Integer; | 62 | MaxActive: Integer; |
| 59 | function ToString: AnsiString; | 63 | function ToString: AnsiString; |
| 64 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineCtcFstDecoderConfig); | ||
| 60 | end; | 65 | end; |
| 61 | 66 | ||
| 62 | TSherpaOnnxOnlineRecognizerConfig = record | 67 | TSherpaOnnxOnlineRecognizerConfig = record |
| @@ -75,6 +80,7 @@ type | @@ -75,6 +80,7 @@ type | ||
| 75 | RuleFars: AnsiString; | 80 | RuleFars: AnsiString; |
| 76 | BlankPenalty: Single; | 81 | BlankPenalty: Single; |
| 77 | function ToString: AnsiString; | 82 | function ToString: AnsiString; |
| 83 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineRecognizerConfig); | ||
| 78 | end; | 84 | end; |
| 79 | 85 | ||
| 80 | TSherpaOnnxOnlineRecognizerResult = record | 86 | TSherpaOnnxOnlineRecognizerResult = record |
| @@ -97,6 +103,7 @@ type | @@ -97,6 +103,7 @@ type | ||
| 97 | TSherpaOnnxOnlineRecognizer = class | 103 | TSherpaOnnxOnlineRecognizer = class |
| 98 | private | 104 | private |
| 99 | Handle: Pointer; | 105 | Handle: Pointer; |
| 106 | + _Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 100 | public | 107 | public |
| 101 | constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig); | 108 | constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig); |
| 102 | destructor Destroy; override; | 109 | destructor Destroy; override; |
| @@ -108,6 +115,7 @@ type | @@ -108,6 +115,7 @@ type | ||
| 108 | procedure Reset(Stream: TSherpaOnnxOnlineStream); | 115 | procedure Reset(Stream: TSherpaOnnxOnlineStream); |
| 109 | function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; | 116 | function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; |
| 110 | function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; | 117 | function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; |
| 118 | + property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config; | ||
| 111 | end; | 119 | end; |
| 112 | 120 | ||
| 113 | TSherpaOnnxOfflineTransducerModelConfig = record | 121 | TSherpaOnnxOfflineTransducerModelConfig = record |
| @@ -134,6 +142,7 @@ type | @@ -134,6 +142,7 @@ type | ||
| 134 | Task: AnsiString; | 142 | Task: AnsiString; |
| 135 | TailPaddings: Integer; | 143 | TailPaddings: Integer; |
| 136 | function ToString: AnsiString; | 144 | function ToString: AnsiString; |
| 145 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig); | ||
| 137 | end; | 146 | end; |
| 138 | 147 | ||
| 139 | TSherpaOnnxOfflineTdnnModelConfig = record | 148 | TSherpaOnnxOfflineTdnnModelConfig = record |
| @@ -145,12 +154,14 @@ type | @@ -145,12 +154,14 @@ type | ||
| 145 | Model: AnsiString; | 154 | Model: AnsiString; |
| 146 | Scale: Single; | 155 | Scale: Single; |
| 147 | function ToString: AnsiString; | 156 | function ToString: AnsiString; |
| 157 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig); | ||
| 148 | end; | 158 | end; |
| 149 | 159 | ||
| 150 | TSherpaOnnxOfflineSenseVoiceModelConfig = record | 160 | TSherpaOnnxOfflineSenseVoiceModelConfig = record |
| 151 | Model: AnsiString; | 161 | Model: AnsiString; |
| 152 | Language: AnsiString; | 162 | Language: AnsiString; |
| 153 | UseItn: Boolean; | 163 | UseItn: Boolean; |
| 164 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSenseVoiceModelConfig); | ||
| 154 | function ToString: AnsiString; | 165 | function ToString: AnsiString; |
| 155 | end; | 166 | end; |
| 156 | 167 | ||
| @@ -169,6 +180,7 @@ type | @@ -169,6 +180,7 @@ type | ||
| 169 | BpeVocab: AnsiString; | 180 | BpeVocab: AnsiString; |
| 170 | TeleSpeechCtc: AnsiString; | 181 | TeleSpeechCtc: AnsiString; |
| 171 | SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; | 182 | SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; |
| 183 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); | ||
| 172 | function ToString: AnsiString; | 184 | function ToString: AnsiString; |
| 173 | end; | 185 | end; |
| 174 | 186 | ||
| @@ -183,6 +195,7 @@ type | @@ -183,6 +195,7 @@ type | ||
| 183 | RuleFsts: AnsiString; | 195 | RuleFsts: AnsiString; |
| 184 | RuleFars: AnsiString; | 196 | RuleFars: AnsiString; |
| 185 | BlankPenalty: Single; | 197 | BlankPenalty: Single; |
| 198 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineRecognizerConfig); | ||
| 186 | function ToString: AnsiString; | 199 | function ToString: AnsiString; |
| 187 | end; | 200 | end; |
| 188 | 201 | ||
| @@ -205,18 +218,83 @@ type | @@ -205,18 +218,83 @@ type | ||
| 205 | TSherpaOnnxOfflineRecognizer = class | 218 | TSherpaOnnxOfflineRecognizer = class |
| 206 | private | 219 | private |
| 207 | Handle: Pointer; | 220 | Handle: Pointer; |
| 221 | + _Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 208 | public | 222 | public |
| 209 | constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig); | 223 | constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig); |
| 210 | destructor Destroy; override; | 224 | destructor Destroy; override; |
| 211 | function CreateStream: TSherpaOnnxOfflineStream; | 225 | function CreateStream: TSherpaOnnxOfflineStream; |
| 212 | procedure Decode(Stream: TSherpaOnnxOfflineStream); | 226 | procedure Decode(Stream: TSherpaOnnxOfflineStream); |
| 213 | function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; | 227 | function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; |
| 228 | + property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; | ||
| 214 | end; | 229 | end; |
| 215 | 230 | ||
| 216 | -{ It supports reading a single channel wave with 16-bit encoded samples. | ||
| 217 | - Samples are normalized to the range [-1, 1]. | ||
| 218 | -} | ||
| 219 | -function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; | 231 | + TSherpaOnnxSileroVadModelConfig = record |
| 232 | + Model: AnsiString; | ||
| 233 | + Threshold: Single; | ||
| 234 | + MinSilenceDuration: Single; | ||
| 235 | + MinSpeechDuration: Single; | ||
| 236 | + WindowSize: Integer; | ||
| 237 | + function ToString: AnsiString; | ||
| 238 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig); | ||
| 239 | + end; | ||
| 240 | + | ||
| 241 | + TSherpaOnnxVadModelConfig = record | ||
| 242 | + SileroVad: TSherpaOnnxSileroVadModelConfig; | ||
| 243 | + SampleRate: Integer; | ||
| 244 | + NumThreads: Integer; | ||
| 245 | + Provider: AnsiString; | ||
| 246 | + Debug: Boolean; | ||
| 247 | + function ToString: AnsiString; | ||
| 248 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig); | ||
| 249 | + end; | ||
| 250 | + | ||
| 251 | + TSherpaOnnxSamplesArray = array of Single; | ||
| 252 | + | ||
| 253 | + TSherpaOnnxCircularBuffer = class | ||
| 254 | + private | ||
| 255 | + Handle: Pointer; | ||
| 256 | + public | ||
| 257 | + constructor Create(Capacity: Integer); | ||
| 258 | + destructor Destroy; override; | ||
| 259 | + procedure Push(Samples: array of Single); | ||
| 260 | + function Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray; | ||
| 261 | + procedure Pop(N: Integer); | ||
| 262 | + procedure Reset; | ||
| 263 | + function Size: Integer; | ||
| 264 | + function Head: Integer; | ||
| 265 | + end; | ||
| 266 | + | ||
| 267 | + TSherpaOnnxSpeechSegment = record | ||
| 268 | + Samples: array of Single; | ||
| 269 | + Start: Integer; | ||
| 270 | + end; | ||
| 271 | + | ||
| 272 | + TSherpaOnnxVoiceActivityDetector = class | ||
| 273 | + private | ||
| 274 | + Handle: Pointer; | ||
| 275 | + _Config: TSherpaOnnxVadModelConfig; | ||
| 276 | + public | ||
| 277 | + constructor Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single); | ||
| 278 | + destructor Destroy; override; | ||
| 279 | + procedure AcceptWaveform(Samples: array of Single); overload; | ||
| 280 | + procedure AcceptWaveform(Samples: array of Single; Offset: Integer; N: Integer); overload; | ||
| 281 | + function IsEmpty: Boolean; | ||
| 282 | + function IsDetected: Boolean; | ||
| 283 | + procedure Pop; | ||
| 284 | + procedure Clear; | ||
| 285 | + function Front: TSherpaOnnxSpeechSegment; | ||
| 286 | + procedure Reset; | ||
| 287 | + procedure Flush; | ||
| 288 | + property Config: TSherpaOnnxVadModelConfig Read _Config; | ||
| 289 | + end; | ||
| 290 | + | ||
| 291 | + { It supports reading a single channel wave with 16-bit encoded samples. | ||
| 292 | + Samples are normalized to the range [-1, 1]. | ||
| 293 | + } | ||
| 294 | + function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; | ||
| 295 | + | ||
| 296 | + function SherpaOnnxWriteWave(Filename: AnsiString; | ||
| 297 | + Samples: array of Single; SampleRate: Integer): Boolean; | ||
| 220 | 298 | ||
| 221 | implementation | 299 | implementation |
| 222 | 300 | ||
| @@ -294,15 +372,15 @@ type | @@ -294,15 +372,15 @@ type | ||
| 294 | DecodingMethod: PAnsiChar; | 372 | DecodingMethod: PAnsiChar; |
| 295 | MaxActivePaths: cint32; | 373 | MaxActivePaths: cint32; |
| 296 | EnableEndpoint: cint32; | 374 | EnableEndpoint: cint32; |
| 297 | - Rule1MinTrailingSilence: Single; | ||
| 298 | - Rule2MinTrailingSilence: Single; | ||
| 299 | - Rule3MinUtteranceLength: Single; | 375 | + Rule1MinTrailingSilence: cfloat; |
| 376 | + Rule2MinTrailingSilence: cfloat; | ||
| 377 | + Rule3MinUtteranceLength: cfloat; | ||
| 300 | HotwordsFile: PAnsiChar; | 378 | HotwordsFile: PAnsiChar; |
| 301 | - HotwordsScore: Single; | 379 | + HotwordsScore: cfloat; |
| 302 | CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig; | 380 | CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig; |
| 303 | RuleFsts: PAnsiChar; | 381 | RuleFsts: PAnsiChar; |
| 304 | RuleFars: PAnsiChar; | 382 | RuleFars: PAnsiChar; |
| 305 | - BlankPenalty: Single; | 383 | + BlankPenalty: cfloat; |
| 306 | end; | 384 | end; |
| 307 | 385 | ||
| 308 | PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; | 386 | PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; |
| @@ -330,7 +408,7 @@ type | @@ -330,7 +408,7 @@ type | ||
| 330 | end; | 408 | end; |
| 331 | SherpaOnnxOfflineLMConfig = record | 409 | SherpaOnnxOfflineLMConfig = record |
| 332 | Model: PAnsiChar; | 410 | Model: PAnsiChar; |
| 333 | - Scale: Single; | 411 | + Scale: cfloat; |
| 334 | end; | 412 | end; |
| 335 | SherpaOnnxOfflineSenseVoiceModelConfig = record | 413 | SherpaOnnxOfflineSenseVoiceModelConfig = record |
| 336 | Model: PAnsiChar; | 414 | Model: PAnsiChar; |
| @@ -361,14 +439,100 @@ type | @@ -361,14 +439,100 @@ type | ||
| 361 | DecodingMethod: PAnsiChar; | 439 | DecodingMethod: PAnsiChar; |
| 362 | MaxActivePaths: cint32; | 440 | MaxActivePaths: cint32; |
| 363 | HotwordsFile: PAnsiChar; | 441 | HotwordsFile: PAnsiChar; |
| 364 | - HotwordsScore: Single; | 442 | + HotwordsScore: cfloat; |
| 365 | RuleFsts: PAnsiChar; | 443 | RuleFsts: PAnsiChar; |
| 366 | RuleFars: PAnsiChar; | 444 | RuleFars: PAnsiChar; |
| 367 | - BlankPenalty: Single; | 445 | + BlankPenalty: cfloat; |
| 368 | end; | 446 | end; |
| 369 | 447 | ||
| 370 | PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig; | 448 | PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig; |
| 371 | 449 | ||
| 450 | + SherpaOnnxSileroVadModelConfig = record | ||
| 451 | + Model: PAnsiChar; | ||
| 452 | + Threshold: cfloat; | ||
| 453 | + MinSilenceDuration: cfloat; | ||
| 454 | + MinSpeechDuration: cfloat; | ||
| 455 | + WindowSize: cint32; | ||
| 456 | + end; | ||
| 457 | + SherpaOnnxVadModelConfig = record | ||
| 458 | + SileroVad: SherpaOnnxSileroVadModelConfig; | ||
| 459 | + SampleRate: cint32; | ||
| 460 | + NumThreads: cint32; | ||
| 461 | + Provider: PAnsiChar; | ||
| 462 | + Debug: cint32; | ||
| 463 | + end; | ||
| 464 | + PSherpaOnnxVadModelConfig = ^SherpaOnnxVadModelConfig; | ||
| 465 | + | ||
| 466 | + SherpaOnnxSpeechSegment = record | ||
| 467 | + Start: cint32; | ||
| 468 | + Samples: pcfloat; | ||
| 469 | + N: cint32; | ||
| 470 | + end; | ||
| 471 | + | ||
| 472 | + PSherpaOnnxSpeechSegment = ^SherpaOnnxSpeechSegment; | ||
| 473 | + | ||
| 474 | +function SherpaOnnxCreateVoiceActivityDetector(Config: PSherpaOnnxVadModelConfig; | ||
| 475 | + BufferSizeInSeconds: cfloat): Pointer; cdecl; | ||
| 476 | + external SherpaOnnxLibName; | ||
| 477 | + | ||
| 478 | +procedure SherpaOnnxDestroyVoiceActivityDetector(Vad: Pointer); cdecl; | ||
| 479 | + external SherpaOnnxLibName; | ||
| 480 | + | ||
| 481 | +procedure SherpaOnnxVoiceActivityDetectorAcceptWaveform(Vad: Pointer; | ||
| 482 | + Samples: pcfloat; N: cint32); cdecl; | ||
| 483 | + external SherpaOnnxLibName; | ||
| 484 | + | ||
| 485 | +function SherpaOnnxVoiceActivityDetectorEmpty(Vad: Pointer): cint32; cdecl; | ||
| 486 | + external SherpaOnnxLibName; | ||
| 487 | + | ||
| 488 | +function SherpaOnnxVoiceActivityDetectorDetected(Vad: Pointer): cint32; cdecl; | ||
| 489 | + external SherpaOnnxLibName; | ||
| 490 | + | ||
| 491 | +procedure SherpaOnnxVoiceActivityDetectorPop(Vad: Pointer); cdecl; | ||
| 492 | + external SherpaOnnxLibName; | ||
| 493 | + | ||
| 494 | +procedure SherpaOnnxVoiceActivityDetectorClear(Vad: Pointer); cdecl; | ||
| 495 | + external SherpaOnnxLibName; | ||
| 496 | + | ||
| 497 | +function SherpaOnnxVoiceActivityDetectorFront(Vad: Pointer): PSherpaOnnxSpeechSegment; cdecl; | ||
| 498 | + external SherpaOnnxLibName; | ||
| 499 | + | ||
| 500 | +procedure SherpaOnnxDestroySpeechSegment(P: PSherpaOnnxSpeechSegment); cdecl; | ||
| 501 | + external SherpaOnnxLibName; | ||
| 502 | + | ||
| 503 | +procedure SherpaOnnxVoiceActivityDetectorReset(P: PSherpaOnnxSpeechSegment); cdecl; | ||
| 504 | + external SherpaOnnxLibName; | ||
| 505 | + | ||
| 506 | +procedure SherpaOnnxVoiceActivityDetectorFlush(P: PSherpaOnnxSpeechSegment); cdecl; | ||
| 507 | + external SherpaOnnxLibName; | ||
| 508 | + | ||
| 509 | +function SherpaOnnxCreateCircularBuffer(Capacity: cint32): Pointer; cdecl; | ||
| 510 | + external SherpaOnnxLibName; | ||
| 511 | + | ||
| 512 | +procedure SherpaOnnxDestroyCircularBuffer(Buffer: Pointer) ; cdecl; | ||
| 513 | + external SherpaOnnxLibName; | ||
| 514 | + | ||
| 515 | +procedure SherpaOnnxCircularBufferPush(Buffer: Pointer; Samples: pcfloat; N: cint32); cdecl; | ||
| 516 | + external SherpaOnnxLibName; | ||
| 517 | + | ||
| 518 | +function SherpaOnnxCircularBufferGet(Buffer: Pointer; StartIndex: cint32; N: cint32): pcfloat ; cdecl; | ||
| 519 | + external SherpaOnnxLibName; | ||
| 520 | + | ||
| 521 | +procedure SherpaOnnxCircularBufferFree(P: pcfloat); cdecl; | ||
| 522 | + external SherpaOnnxLibName; | ||
| 523 | + | ||
| 524 | +procedure SherpaOnnxCircularBufferPop(Buffer: Pointer; N: cint32); cdecl; | ||
| 525 | + external SherpaOnnxLibName; | ||
| 526 | + | ||
| 527 | +function SherpaOnnxCircularBufferSize(Buffer: Pointer): cint32; cdecl; | ||
| 528 | + external SherpaOnnxLibName; | ||
| 529 | + | ||
| 530 | +function SherpaOnnxCircularBufferHead(Buffer: Pointer): cint32; cdecl; | ||
| 531 | + external SherpaOnnxLibName; | ||
| 532 | + | ||
| 533 | +procedure SherpaOnnxCircularBufferReset(Buffer: Pointer); cdecl; | ||
| 534 | + external SherpaOnnxLibName; | ||
| 535 | + | ||
| 372 | function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; | 536 | function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; |
| 373 | external SherpaOnnxLibName; | 537 | external SherpaOnnxLibName; |
| 374 | 538 | ||
| @@ -437,9 +601,20 @@ procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl; | @@ -437,9 +601,20 @@ procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl; | ||
| 437 | function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; | 601 | function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; |
| 438 | external SherpaOnnxLibName name 'SherpaOnnxReadWave'; | 602 | external SherpaOnnxLibName name 'SherpaOnnxReadWave'; |
| 439 | 603 | ||
| 604 | +function SherpaOnnxWriteWaveWrapper(Samples: pcfloat; N: cint32; | ||
| 605 | + SampleRate: cint32; Filename: PAnsiChar): cint32; cdecl; | ||
| 606 | + external SherpaOnnxLibName name 'SherpaOnnxWriteWave'; | ||
| 607 | + | ||
| 440 | procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl; | 608 | procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl; |
| 441 | external SherpaOnnxLibName name 'SherpaOnnxFreeWave'; | 609 | external SherpaOnnxLibName name 'SherpaOnnxFreeWave'; |
| 442 | 610 | ||
| 611 | +function SherpaOnnxWriteWave(Filename: AnsiString; | ||
| 612 | + Samples: array of Single; SampleRate: Integer): Boolean; | ||
| 613 | +begin | ||
| 614 | + Result := SherpaOnnxWriteWaveWrapper(pcfloat(Samples), Length(Samples), | ||
| 615 | + SampleRate, PAnsiChar(Filename)) = 1; | ||
| 616 | +end; | ||
| 617 | + | ||
| 443 | function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; | 618 | function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; |
| 444 | var | 619 | var |
| 445 | PFilename: PAnsiChar; | 620 | PFilename: PAnsiChar; |
| @@ -611,6 +786,7 @@ begin | @@ -611,6 +786,7 @@ begin | ||
| 611 | C.BlankPenalty := Config.BlankPenalty; | 786 | C.BlankPenalty := Config.BlankPenalty; |
| 612 | 787 | ||
| 613 | Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C); | 788 | Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C); |
| 789 | + Self._Config := Config; | ||
| 614 | end; | 790 | end; |
| 615 | 791 | ||
| 616 | destructor TSherpaOnnxOnlineRecognizer.Destroy; | 792 | destructor TSherpaOnnxOnlineRecognizer.Destroy; |
| @@ -877,6 +1053,7 @@ begin | @@ -877,6 +1053,7 @@ begin | ||
| 877 | C.BlankPenalty := Config.BlankPenalty; | 1053 | C.BlankPenalty := Config.BlankPenalty; |
| 878 | 1054 | ||
| 879 | Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C); | 1055 | Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C); |
| 1056 | + Self._Config := Config; | ||
| 880 | end; | 1057 | end; |
| 881 | 1058 | ||
| 882 | destructor TSherpaOnnxOfflineRecognizer.Destroy; | 1059 | destructor TSherpaOnnxOfflineRecognizer.Destroy; |
| @@ -984,5 +1161,255 @@ begin | @@ -984,5 +1161,255 @@ begin | ||
| 984 | [Self.Text, TokensStr, TimestampStr]); | 1161 | [Self.Text, TokensStr, TimestampStr]); |
| 985 | end; | 1162 | end; |
| 986 | 1163 | ||
| 1164 | +function TSherpaOnnxSileroVadModelConfig.ToString: AnsiString; | ||
| 1165 | +begin | ||
| 1166 | + Result := Format('TSherpaOnnxSileroVadModelConfig(' + | ||
| 1167 | + 'Model := %s, ' + | ||
| 1168 | + 'Threshold := %.2f, ' + | ||
| 1169 | + 'MinSilenceDuration := %.2f, ' + | ||
| 1170 | + 'MinSpeechDuration := %.2f, ' + | ||
| 1171 | + 'WindowSize := %d' + | ||
| 1172 | + ')', | ||
| 1173 | + [Self.Model, Self.Threshold, Self.MinSilenceDuration, | ||
| 1174 | + Self.MinSpeechDuration, Self.WindowSize | ||
| 1175 | + ]); | ||
| 1176 | +end; | ||
| 1177 | + | ||
| 1178 | +class operator TSherpaOnnxSileroVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSileroVadModelConfig); | ||
| 1179 | +begin | ||
| 1180 | + Dest.Threshold := 0.5; | ||
| 1181 | + Dest.MinSilenceDuration := 0.5; | ||
| 1182 | + Dest.MinSpeechDuration := 0.25; | ||
| 1183 | + Dest.WindowSize := 512; | ||
| 1184 | +end; | ||
| 1185 | + | ||
| 1186 | +function TSherpaOnnxVadModelConfig.ToString: AnsiString; | ||
| 1187 | +begin | ||
| 1188 | + Result := Format('TSherpaOnnxVadModelConfig(' + | ||
| 1189 | + 'SileroVad := %s, ' + | ||
| 1190 | + 'SampleRate := %d, ' + | ||
| 1191 | + 'NumThreads := %d, ' + | ||
| 1192 | + 'Provider := %s, ' + | ||
| 1193 | + 'Debug := %s' + | ||
| 1194 | + ')', | ||
| 1195 | + [Self.SileroVad.ToString, Self.SampleRate, Self.NumThreads, Self.Provider, | ||
| 1196 | + Self.Debug.ToString | ||
| 1197 | + ]); | ||
| 1198 | +end; | ||
| 1199 | + | ||
| 1200 | +class operator TSherpaOnnxVadModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig); | ||
| 1201 | +begin | ||
| 1202 | + Dest.SampleRate := 16000; | ||
| 1203 | + Dest.NumThreads := 1; | ||
| 1204 | + Dest.Provider := 'cpu'; | ||
| 1205 | + Dest.Debug := False; | ||
| 1206 | +end; | ||
| 1207 | + | ||
| 1208 | +class operator TSherpaOnnxFeatureConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFeatureConfig); | ||
| 1209 | +begin | ||
| 1210 | + Dest.SampleRate := 16000; | ||
| 1211 | + Dest.FeatureDim := 80; | ||
| 1212 | +end; | ||
| 1213 | + | ||
| 1214 | +class operator TSherpaOnnxOnlineCtcFstDecoderConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineCtcFstDecoderConfig); | ||
| 1215 | +begin | ||
| 1216 | + Dest.MaxActive := 3000; | ||
| 1217 | +end; | ||
| 1218 | + | ||
| 1219 | +class operator TSherpaOnnxOnlineRecognizerConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineRecognizerConfig); | ||
| 1220 | +begin | ||
| 1221 | + Dest.DecodingMethod := 'greedy_search'; | ||
| 1222 | + Dest.EnableEndpoint := False; | ||
| 1223 | + Dest.Rule1MinTrailingSilence := 2.4; | ||
| 1224 | + Dest.Rule2MinTrailingSilence := 1.2; | ||
| 1225 | + Dest.Rule3MinUtteranceLength := 20; | ||
| 1226 | + Dest.HotwordsScore := 1.5; | ||
| 1227 | + Dest.BlankPenalty := 0; | ||
| 1228 | +end; | ||
| 1229 | + | ||
| 1230 | +class operator TSherpaOnnxOnlineModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOnlineModelConfig); | ||
| 1231 | +begin | ||
| 1232 | + Dest.NumThreads := 1; | ||
| 1233 | + Dest.Provider := 'cpu'; | ||
| 1234 | + Dest.Debug := False; | ||
| 1235 | +end; | ||
| 1236 | + | ||
| 1237 | +class operator TSherpaOnnxOfflineWhisperModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig); | ||
| 1238 | +begin | ||
| 1239 | + Dest.Task := 'transcribe'; | ||
| 1240 | + Dest.TailPaddings := -1; | ||
| 1241 | +end; | ||
| 1242 | + | ||
| 1243 | +class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig); | ||
| 1244 | +begin | ||
| 1245 | + Dest.Scale := 1.0; | ||
| 1246 | +end; | ||
| 1247 | + | ||
| 1248 | +class operator TSherpaOnnxOfflineSenseVoiceModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSenseVoiceModelConfig); | ||
| 1249 | +begin | ||
| 1250 | + Dest.UseItn := True; | ||
| 1251 | +end; | ||
| 1252 | + | ||
| 1253 | +class operator TSherpaOnnxOfflineModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); | ||
| 1254 | +begin | ||
| 1255 | + Dest.NumThreads := 1; | ||
| 1256 | + Dest.Debug := False; | ||
| 1257 | + Dest.Provider := 'cpu'; | ||
| 1258 | +end; | ||
| 1259 | + | ||
| 1260 | +class operator TSherpaOnnxOfflineRecognizerConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineRecognizerConfig); | ||
| 1261 | +begin | ||
| 1262 | + Dest.DecodingMethod := 'greedy_search'; | ||
| 1263 | + Dest.MaxActivePaths := 4; | ||
| 1264 | + Dest.HotwordsScore := 1.5; | ||
| 1265 | + Dest.BlankPenalty := 0; | ||
| 1266 | +end; | ||
| 1267 | + | ||
| 1268 | +constructor TSherpaOnnxCircularBuffer.Create(Capacity: Integer); | ||
| 1269 | +begin | ||
| 1270 | + Self.Handle := SherpaOnnxCreateCircularBuffer(Capacity); | ||
| 1271 | +end; | ||
| 1272 | + | ||
| 1273 | +destructor TSherpaOnnxCircularBuffer.Destroy; | ||
| 1274 | +begin | ||
| 1275 | + SherpaOnnxDestroyCircularBuffer(Self.Handle); | ||
| 1276 | + Self.Handle := nil; | ||
| 1277 | +end; | ||
| 1278 | + | ||
| 1279 | +procedure TSherpaOnnxCircularBuffer.Push(Samples: array of Single); | ||
| 1280 | +begin | ||
| 1281 | + SherpaOnnxCircularBufferPush(Self.Handle, pcfloat(Samples), Length(Samples)); | ||
| 1282 | +end; | ||
| 1283 | + | ||
| 1284 | +function TSherpaOnnxCircularBuffer.Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray; | ||
| 1285 | +var | ||
| 1286 | + P: pcfloat; | ||
| 1287 | + I: Integer; | ||
| 1288 | +begin | ||
| 1289 | + P := SherpaOnnxCircularBufferGet(Self.Handle, StartIndex, N); | ||
| 1290 | + | ||
| 1291 | + Result := nil; | ||
| 1292 | + | ||
| 1293 | + SetLength(Result, N); | ||
| 1294 | + | ||
| 1295 | + for I := Low(Result) to High(Result) do | ||
| 1296 | + Result[I] := P[I]; | ||
| 1297 | + | ||
| 1298 | + SherpaOnnxCircularBufferFree(P); | ||
| 1299 | +end; | ||
| 1300 | + | ||
| 1301 | +procedure TSherpaOnnxCircularBuffer.Pop(N: Integer); | ||
| 1302 | +begin | ||
| 1303 | + SherpaOnnxCircularBufferPop(Self.Handle, N); | ||
| 1304 | +end; | ||
| 1305 | + | ||
| 1306 | +procedure TSherpaOnnxCircularBuffer.Reset; | ||
| 1307 | +begin | ||
| 1308 | + SherpaOnnxCircularBufferReset(Self.Handle); | ||
| 1309 | +end; | ||
| 1310 | + | ||
| 1311 | +function TSherpaOnnxCircularBuffer.Size: Integer; | ||
| 1312 | +begin | ||
| 1313 | + Result := SherpaOnnxCircularBufferSize(Self.Handle); | ||
| 1314 | +end; | ||
| 1315 | + | ||
| 1316 | +function TSherpaOnnxCircularBuffer.Head: Integer; | ||
| 1317 | +begin | ||
| 1318 | + Result := SherpaOnnxCircularBufferHead(Self.Handle); | ||
| 1319 | +end; | ||
| 1320 | + | ||
| 1321 | +constructor TSherpaOnnxVoiceActivityDetector.Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single); | ||
| 1322 | +var | ||
| 1323 | + C: SherpaOnnxVadModelConfig; | ||
| 1324 | +begin | ||
| 1325 | + Self._Config := Config; | ||
| 1326 | + | ||
| 1327 | + Initialize(C); | ||
| 1328 | + | ||
| 1329 | + C.SileroVad.Model := PAnsiChar(Config.SileroVad.Model); | ||
| 1330 | + C.SileroVad.Threshold := Config.SileroVad.Threshold; | ||
| 1331 | + C.SileroVad.MinSilenceDuration := Config.SileroVad.MinSilenceDuration; | ||
| 1332 | + C.SileroVad.MinSpeechDuration := Config.SileroVad.MinSpeechDuration; | ||
| 1333 | + C.SileroVad.WindowSize := Config.SileroVad.WindowSize; | ||
| 1334 | + | ||
| 1335 | + C.SampleRate := Config.SampleRate; | ||
| 1336 | + C.NumThreads := Config.NumThreads; | ||
| 1337 | + C.Provider := PAnsiChar(Config.Provider); | ||
| 1338 | + C.Debug := Ord(Config.Debug); | ||
| 1339 | + | ||
| 1340 | + Self.Handle := SherpaOnnxCreateVoiceActivityDetector(@C, BufferSizeInSeconds); | ||
| 1341 | +end; | ||
| 1342 | + | ||
| 1343 | +destructor TSherpaOnnxVoiceActivityDetector.Destroy; | ||
| 1344 | +begin | ||
| 1345 | + SherpaOnnxDestroyVoiceActivityDetector(Self.Handle); | ||
| 1346 | + Self.Handle := nil; | ||
| 1347 | +end; | ||
| 1348 | + | ||
| 1349 | +procedure TSherpaOnnxVoiceActivityDetector.AcceptWaveform(Samples: array of Single); | ||
| 1350 | +begin | ||
| 1351 | + SherpaOnnxVoiceActivityDetectorAcceptWaveform(Self.Handle, pcfloat(Samples), Length(Samples)); | ||
| 1352 | +end; | ||
| 1353 | + | ||
| 1354 | +procedure TSherpaOnnxVoiceActivityDetector.AcceptWaveform(Samples: array of Single; Offset: Integer; N: Integer); | ||
| 1355 | +begin | ||
| 1356 | + if Offset + N > Length(Samples) then | ||
| 1357 | + begin | ||
| 1358 | + WriteLn(Format('Invalid arguments!. Array length: %d, Offset: %d, N: %d', | ||
| 1359 | + [Length(Samples), Offset, N] | ||
| 1360 | + )); | ||
| 1361 | + Exit; | ||
| 1362 | + end; | ||
| 1363 | + | ||
| 1364 | + SherpaOnnxVoiceActivityDetectorAcceptWaveform(Self.Handle, | ||
| 1365 | + pcfloat(Samples) + Offset, N); | ||
| 1366 | +end; | ||
| 1367 | + | ||
| 1368 | +function TSherpaOnnxVoiceActivityDetector.IsEmpty: Boolean; | ||
| 1369 | +begin | ||
| 1370 | + Result := SherpaOnnxVoiceActivityDetectorEmpty(Self.Handle) = 1; | ||
| 1371 | +end; | ||
| 1372 | + | ||
| 1373 | +function TSherpaOnnxVoiceActivityDetector.IsDetected: Boolean; | ||
| 1374 | +begin | ||
| 1375 | + Result := SherpaOnnxVoiceActivityDetectorDetected(Self.Handle) = 1; | ||
| 1376 | +end; | ||
| 1377 | + | ||
| 1378 | +procedure TSherpaOnnxVoiceActivityDetector.Pop; | ||
| 1379 | +begin | ||
| 1380 | + SherpaOnnxVoiceActivityDetectorPop(Self.Handle); | ||
| 1381 | +end; | ||
| 1382 | + | ||
| 1383 | +procedure TSherpaOnnxVoiceActivityDetector.Clear; | ||
| 1384 | +begin | ||
| 1385 | + SherpaOnnxVoiceActivityDetectorClear(Self.Handle); | ||
| 1386 | +end; | ||
| 1387 | + | ||
| 1388 | +function TSherpaOnnxVoiceActivityDetector.Front: TSherpaOnnxSpeechSegment; | ||
| 1389 | +var | ||
| 1390 | + P: PSherpaOnnxSpeechSegment; | ||
| 1391 | + I: Integer; | ||
| 1392 | +begin | ||
| 1393 | + P := SherpaOnnxVoiceActivityDetectorFront(Self.Handle); | ||
| 1394 | + Result.Start := P^.Start; | ||
| 1395 | + Result.Samples := nil; | ||
| 1396 | + SetLength(Result.Samples, P^.N); | ||
| 1397 | + | ||
| 1398 | + for I := Low(Result.Samples) to High(Result.Samples) do | ||
| 1399 | + Result.Samples[I] := P^.Samples[I]; | ||
| 1400 | + | ||
| 1401 | + SherpaOnnxDestroySpeechSegment(P); | ||
| 1402 | +end; | ||
| 1403 | + | ||
| 1404 | +procedure TSherpaOnnxVoiceActivityDetector.Reset; | ||
| 1405 | +begin | ||
| 1406 | + SherpaOnnxVoiceActivityDetectorReset(Self.Handle); | ||
| 1407 | +end; | ||
| 1408 | + | ||
| 1409 | +procedure TSherpaOnnxVoiceActivityDetector.Flush; | ||
| 1410 | +begin | ||
| 1411 | + SherpaOnnxVoiceActivityDetectorFlush(Self.Handle); | ||
| 1412 | +end; | ||
| 1413 | + | ||
| 987 | end. | 1414 | end. |
| 988 | 1415 |
-
请 注册 或 登录 后发表评论